aboutsummaryrefslogtreecommitdiff
path: root/emulators/xen-kernel
diff options
context:
space:
mode:
authorRoger Pau Monné <royger@FreeBSD.org>2019-04-24 14:21:41 +0000
committerRoger Pau Monné <royger@FreeBSD.org>2019-04-24 14:21:41 +0000
commit7931facd63ae99271e2f2fd744b0bbe005083541 (patch)
tree88d390cfeefb807d355396d2c9d3a80a9e6281b0 /emulators/xen-kernel
parent88743b8d4778ae7c86495babf66d1be3671a7042 (diff)
downloadports-7931facd63ae99271e2f2fd744b0bbe005083541.tar.gz
ports-7931facd63ae99271e2f2fd744b0bbe005083541.zip
Notes
Diffstat (limited to 'emulators/xen-kernel')
-rw-r--r--emulators/xen-kernel/Makefile50
-rw-r--r--emulators/xen-kernel/distinfo6
-rw-r--r--emulators/xen-kernel/files/0001-pci-treat-class-0-devices-as-endpoints.patch50
-rw-r--r--emulators/xen-kernel/files/0001-pvh-dom0-fix-deadlock-in-GSI-mapping.patch115
-rw-r--r--emulators/xen-kernel/files/0001-vpci-msi-split-code-to-bind-pirq.patch123
-rw-r--r--emulators/xen-kernel/files/0001-x86-HVM-improve-MTRR-load-checks.patch86
-rw-r--r--emulators/xen-kernel/files/0001-x86-dom0-add-extra-RAM-regions-as-UNUSABLE-for-PVH-m.patch94
-rw-r--r--emulators/xen-kernel/files/0001-x86-dom0-propagate-PVH-vlapic-EOIs-to-hardware.patch39
-rw-r--r--emulators/xen-kernel/files/0001-x86-mm-locks-remove-trailing-whitespace.patch101
-rw-r--r--emulators/xen-kernel/files/0001-x86-mtrr-introduce-mask-to-get-VCNT-from-MTRRcap-MSR.patch87
-rw-r--r--emulators/xen-kernel/files/0001-x86-pvh-change-the-order-of-the-iommu-initialization.patch53
-rw-r--r--emulators/xen-kernel/files/0001-x86-replace-usage-in-the-linker-script.patch39
-rw-r--r--emulators/xen-kernel/files/0002-vpci-msi-fix-update-of-bound-MSI-interrupts.patch94
-rw-r--r--emulators/xen-kernel/files/0002-x86-mm-locks-convert-some-macros-to-inline-functions.patch210
-rw-r--r--emulators/xen-kernel/files/0002-x86-mtrr-split-enabled-field-into-two-boolean-flags.patch198
-rw-r--r--emulators/xen-kernel/files/0003-hvm-mtrr-add-emacs-local-variables-block-with-format.patch35
-rw-r--r--emulators/xen-kernel/files/0003-x86-mm-locks-apply-a-bias-to-lock-levels-for-control.patch319
-rw-r--r--emulators/xen-kernel/files/0004-hvm-mtrr-use-the-hardware-number-of-variable-ranges-.patch135
-rw-r--r--emulators/xen-kernel/files/0005-hvm-mtrr-copy-hardware-state-for-Dom0.patch59
-rw-r--r--emulators/xen-kernel/files/0006-libxc-pvh-set-default-MTRR-type-to-write-back.patch104
-rw-r--r--emulators/xen-kernel/files/0007-docs-pvh-document-initial-MTRR-state.patch44
-rw-r--r--emulators/xen-kernel/files/xen.4th99
-rw-r--r--emulators/xen-kernel/files/xsa284.patch31
-rw-r--r--emulators/xen-kernel/files/xsa287-4.11.patch328
-rw-r--r--emulators/xen-kernel/files/xsa290-4.11-1.patch237
-rw-r--r--emulators/xen-kernel/files/xsa290-4.11-2.patch71
-rw-r--r--emulators/xen-kernel/files/xsa292.patch95
-rw-r--r--emulators/xen-kernel/files/xsa293-4.11-1.patch317
-rw-r--r--emulators/xen-kernel/files/xsa293-4.11-2.patch260
-rw-r--r--emulators/xen-kernel/files/xsa294-4.11.patch71
-rw-r--r--emulators/xen-kernel/pkg-message4
31 files changed, 5 insertions, 3549 deletions
diff --git a/emulators/xen-kernel/Makefile b/emulators/xen-kernel/Makefile
index ff6b4de3b3e1..4ffb7218cda3 100644
--- a/emulators/xen-kernel/Makefile
+++ b/emulators/xen-kernel/Makefile
@@ -1,8 +1,8 @@
# $FreeBSD$
PORTNAME= xen
-PORTVERSION= 4.11.1
-PORTREVISION= 1
+PORTVERSION= 4.12.0
+PORTREVISION= 0
CATEGORIES= emulators
MASTER_SITES= http://downloads.xenproject.org/release/xen/${PORTVERSION}/
PKGNAMESUFFIX= -kernel
@@ -22,53 +22,8 @@ MAKE_ARGS= clang=y PYTHON=${PYTHON_CMD} ARCH=x86_64
NO_MTREE= yes
STRIP= #
PLIST_FILES= /boot/xen \
- /boot/xen.4th \
lib/debug/boot/xen.debug
-# IOMMU fixes
-EXTRA_PATCHES= ${FILESDIR}/0001-pci-treat-class-0-devices-as-endpoints.patch:-p1
-# vPCI MSI fixes
-EXTRA_PATCHES+= ${FILESDIR}/0001-vpci-msi-split-code-to-bind-pirq.patch:-p1 \
- ${FILESDIR}/0002-vpci-msi-fix-update-of-bound-MSI-interrupts.patch:-p1
-# Add extra RAM regions to Dom0 memory map as UNUSABNLE
-EXTRA_PATCHES+= ${FILESDIR}/0001-x86-dom0-add-extra-RAM-regions-as-UNUSABLE-for-PVH-m.patch:-p1
-# MTRR guest setup for PVH
-EXTRA_PATCHES+= ${FILESDIR}/0001-x86-mtrr-introduce-mask-to-get-VCNT-from-MTRRcap-MSR.patch:-p1 \
- ${FILESDIR}/0001-x86-HVM-improve-MTRR-load-checks.patch:-p1 \
- ${FILESDIR}/0002-x86-mtrr-split-enabled-field-into-two-boolean-flags.patch:-p1 \
- ${FILESDIR}/0003-hvm-mtrr-add-emacs-local-variables-block-with-format.patch:-p1 \
- ${FILESDIR}/0004-hvm-mtrr-use-the-hardware-number-of-variable-ranges-.patch:-p1 \
- ${FILESDIR}/0005-hvm-mtrr-copy-hardware-state-for-Dom0.patch:-p1 \
- ${FILESDIR}/0006-libxc-pvh-set-default-MTRR-type-to-write-back.patch:-p1 \
- ${FILESDIR}/0007-docs-pvh-document-initial-MTRR-state.patch:-p1
-# Build with lld (LLVM linker)
-EXTRA_PATCHES+= ${FILESDIR}/0001-x86-replace-usage-in-the-linker-script.patch:-p1
-# Fix PVH Dom0 build with shadow paging
-EXTRA_PATCHES+= ${FILESDIR}/0001-x86-pvh-change-the-order-of-the-iommu-initialization.patch:-p1
-# Forward dom0 lapic EOIs to underlying hardware
-EXTRA_PATCHES+= ${FILESDIR}/0001-x86-dom0-propagate-PVH-vlapic-EOIs-to-hardware.patch:-p1
-# Fix deadlock in IO-APIC gsi mapping
-EXTRA_PATCHES+= ${FILESDIR}/0001-pvh-dom0-fix-deadlock-in-GSI-mapping.patch:-p1
-# Fix for migration/save
-EXTRA_PATCHES+= ${FILESDIR}/0001-x86-mm-locks-remove-trailing-whitespace.patch:-p1 \
- ${FILESDIR}/0002-x86-mm-locks-convert-some-macros-to-inline-functions.patch:-p1 \
- ${FILESDIR}/0003-x86-mm-locks-apply-a-bias-to-lock-levels-for-control.patch:-p1
-
-# XSA-284
-EXTRA_PATCHES+= ${FILESDIR}/xsa284.patch:-p1
-# XSA-287
-EXTRA_PATCHES+= ${FILESDIR}/xsa287-4.11.patch:-p1
-# XSA-290
-EXTRA_PATCHES+= ${FILESDIR}/xsa290-4.11-1.patch:-p1 \
- ${FILESDIR}/xsa290-4.11-2.patch:-p1
-# XSA-292
-EXTRA_PATCHES+= ${FILESDIR}/xsa292.patch:-p1
-# XSA-293
-EXTRA_PATCHES+= ${FILESDIR}/xsa293-4.11-1.patch:-p1 \
- ${FILESDIR}/xsa293-4.11-2.patch:-p1
-# XSA-294
-EXTRA_PATCHES+= ${FILESDIR}/xsa294-4.11.patch:-p1
-
.include <bsd.port.options.mk>
.if ${OPSYS} != FreeBSD
@@ -88,7 +43,6 @@ do-install:
${MKDIR} ${STAGEDIR}/boot
${MKDIR} ${STAGEDIR}${PREFIX}/lib/debug/boot/
${INSTALL_PROGRAM} ${WRKSRC}/xen/xen ${STAGEDIR}/boot
- ${INSTALL_DATA} ${FILESDIR}/xen.4th ${STAGEDIR}/boot
${INSTALL_DATA} ${WRKSRC}/xen/xen-syms ${STAGEDIR}${PREFIX}/lib/debug/boot/xen.debug
.include <bsd.port.mk>
diff --git a/emulators/xen-kernel/distinfo b/emulators/xen-kernel/distinfo
index a61ead1394a4..85ee9e3c004f 100644
--- a/emulators/xen-kernel/distinfo
+++ b/emulators/xen-kernel/distinfo
@@ -1,3 +1,3 @@
-TIMESTAMP = 1550823720
-SHA256 (xen-4.11.1.tar.gz) = be88cb2443761990efc1070d9718016561fe19066af232f9bfae572922897e59
-SIZE (xen-4.11.1.tar.gz) = 25152217
+TIMESTAMP = 1555345627
+SHA256 (xen-4.12.0.tar.gz) = 6e5455e4a58dcb2339bfcd2a89842728068b530aa62501843793f7cf743c4d64
+SIZE (xen-4.12.0.tar.gz) = 26949697
diff --git a/emulators/xen-kernel/files/0001-pci-treat-class-0-devices-as-endpoints.patch b/emulators/xen-kernel/files/0001-pci-treat-class-0-devices-as-endpoints.patch
deleted file mode 100644
index 502603a89f42..000000000000
--- a/emulators/xen-kernel/files/0001-pci-treat-class-0-devices-as-endpoints.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 7495a5b6aa1c741964baf18a1cbdb8b8d71cce98 Mon Sep 17 00:00:00 2001
-From: Roger Pau Monne <roger.pau@citrix.com>
-Date: Tue, 8 May 2018 11:33:00 +0200
-Subject: [PATCH] pci: treat class 0 devices as endpoints
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Class 0 devices are legacy pre PCI 2.0 devices that didn't have a
-class code. Treat them as endpoints, so that they can be handled by
-the IOMMU and properly passed-through to the hardware domain.
-
-Such device has been seen on a Super Micro server, lspci -vv reports:
-
-00:13.0 Non-VGA unclassified device: Intel Corporation Device a135 (rev 31)
- Subsystem: Super Micro Computer Inc Device 0931
- Flags: bus master, fast devsel, latency 0, IRQ 11
- Memory at df222000 (64-bit, non-prefetchable) [size=4K]
- Capabilities: [80] Power Management version 3
-
-Arguably this is not a legacy device (since this is a new server), but
-in any case Xen needs to deal with it.
-
-Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
----
- xen/drivers/passthrough/pci.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
-index 1db69d5b99..c4890a4295 100644
---- a/xen/drivers/passthrough/pci.c
-+++ b/xen/drivers/passthrough/pci.c
-@@ -927,10 +927,11 @@ enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn)
- case PCI_CLASS_BRIDGE_HOST:
- return DEV_TYPE_PCI_HOST_BRIDGE;
-
-- case 0x0000: case 0xffff:
-+ case 0xffff:
- return DEV_TYPE_PCI_UNKNOWN;
- }
-
-+ /* NB: treat legacy pre PCI 2.0 devices (class_device == 0) as endpoints. */
- return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI;
- }
-
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0001-pvh-dom0-fix-deadlock-in-GSI-mapping.patch b/emulators/xen-kernel/files/0001-pvh-dom0-fix-deadlock-in-GSI-mapping.patch
deleted file mode 100644
index e2258d28d369..000000000000
--- a/emulators/xen-kernel/files/0001-pvh-dom0-fix-deadlock-in-GSI-mapping.patch
+++ /dev/null
@@ -1,115 +0,0 @@
-From 603ad88fe8a681a2c5408c3f432d7083dd1c41b1 Mon Sep 17 00:00:00 2001
-From: Roger Pau Monne <roger.pau@citrix.com>
-Date: Mon, 28 Jan 2019 15:22:45 +0100
-Subject: [PATCH] pvh/dom0: fix deadlock in GSI mapping
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The current GSI mapping code can cause the following deadlock:
-
-(XEN) *** Dumping CPU0 host state: ***
-(XEN) ----[ Xen-4.12.0-rc x86_64 debug=y Tainted: C ]----
-[...]
-(XEN) Xen call trace:
-(XEN) [<ffff82d080239852>] vmac.c#_spin_lock_cb+0x32/0x70
-(XEN) [<ffff82d0802ed40f>] vmac.c#hvm_gsi_assert+0x2f/0x60 <- pick hvm.irq_lock
-(XEN) [<ffff82d080255cc9>] io.c#hvm_dirq_assist+0xd9/0x130 <- pick event_lock
-(XEN) [<ffff82d080255b4b>] io.c#dpci_softirq+0xdb/0x120
-(XEN) [<ffff82d080238ce6>] softirq.c#__do_softirq+0x46/0xa0
-(XEN) [<ffff82d08026f955>] domain.c#idle_loop+0x35/0x90
-(XEN)
-[...]
-(XEN) *** Dumping CPU3 host state: ***
-(XEN) ----[ Xen-4.12.0-rc x86_64 debug=y Tainted: C ]----
-[...]
-(XEN) Xen call trace:
-(XEN) [<ffff82d08023985d>] vmac.c#_spin_lock_cb+0x3d/0x70
-(XEN) [<ffff82d080281fc8>] vmac.c#allocate_and_map_gsi_pirq+0xc8/0x130 <- pick event_lock
-(XEN) [<ffff82d0802f44c0>] vioapic.c#vioapic_hwdom_map_gsi+0x80/0x130
-(XEN) [<ffff82d0802f4399>] vioapic.c#vioapic_write_redirent+0x119/0x1c0 <- pick hvm.irq_lock
-(XEN) [<ffff82d0802f4075>] vioapic.c#vioapic_write+0x35/0x40
-(XEN) [<ffff82d0802e96a2>] vmac.c#hvm_process_io_intercept+0xd2/0x230
-(XEN) [<ffff82d0802e9842>] vmac.c#hvm_io_intercept+0x22/0x50
-(XEN) [<ffff82d0802dbe9b>] emulate.c#hvmemul_do_io+0x21b/0x3c0
-(XEN) [<ffff82d0802db302>] emulate.c#hvmemul_do_io_buffer+0x32/0x70
-(XEN) [<ffff82d0802dcd29>] emulate.c#hvmemul_do_mmio_buffer+0x29/0x30
-(XEN) [<ffff82d0802dcc19>] emulate.c#hvmemul_phys_mmio_access+0xf9/0x1b0
-(XEN) [<ffff82d0802dc6d0>] emulate.c#hvmemul_linear_mmio_access+0xf0/0x180
-(XEN) [<ffff82d0802de971>] emulate.c#hvmemul_linear_mmio_write+0x21/0x30
-(XEN) [<ffff82d0802de742>] emulate.c#linear_write+0xa2/0x100
-(XEN) [<ffff82d0802dce15>] emulate.c#hvmemul_write+0xb5/0x120
-(XEN) [<ffff82d0802babba>] vmac.c#x86_emulate+0x132aa/0x149a0
-(XEN) [<ffff82d0802c04f9>] vmac.c#x86_emulate_wrapper+0x29/0x70
-(XEN) [<ffff82d0802db570>] emulate.c#_hvm_emulate_one+0x50/0x140
-(XEN) [<ffff82d0802e9e31>] vmac.c#hvm_emulate_one_insn+0x41/0x100
-(XEN) [<ffff82d080345066>] guest_4.o#sh_page_fault__guest_4+0x976/0xd30
-(XEN) [<ffff82d08030cc69>] vmac.c#vmx_vmexit_handler+0x949/0xea0
-(XEN) [<ffff82d08031411a>] vmac.c#vmx_asm_vmexit_handler+0xfa/0x270
-
-In order to solve it move the vioapic_hwdom_map_gsi outside of the
-locked region in vioapic_write_redirent. vioapic_hwdom_map_gsi will
-not access any of the vioapic fields, so there's no need to call the
-function holding the hvm.irq_lock.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
----
- xen/arch/x86/hvm/vioapic.c | 32 ++++++++++++++++++--------------
- 1 file changed, 18 insertions(+), 14 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
-index 2b74f92d51..2d71c33c1c 100644
---- a/xen/arch/x86/hvm/vioapic.c
-+++ b/xen/arch/x86/hvm/vioapic.c
-@@ -236,20 +236,6 @@ static void vioapic_write_redirent(
-
- *pent = ent;
-
-- if ( is_hardware_domain(d) && unmasked )
-- {
-- int ret;
--
-- ret = vioapic_hwdom_map_gsi(gsi, ent.fields.trig_mode,
-- ent.fields.polarity);
-- if ( ret )
-- {
-- /* Mask the entry again. */
-- pent->fields.mask = 1;
-- unmasked = 0;
-- }
-- }
--
- if ( gsi == 0 )
- {
- vlapic_adjust_i8259_target(d);
-@@ -266,6 +252,24 @@ static void vioapic_write_redirent(
-
- spin_unlock(&d->arch.hvm.irq_lock);
-
-+ if ( is_hardware_domain(d) && unmasked )
-+ {
-+ /*
-+ * NB: don't call vioapic_hwdom_map_gsi while holding hvm.irq_lock
-+ * since it can cause deadlocks as event_lock is taken by
-+ * allocate_and_map_gsi_pirq, and that will invert the locking order
-+ * used by other parts of the code.
-+ */
-+ int ret = vioapic_hwdom_map_gsi(gsi, ent.fields.trig_mode,
-+ ent.fields.polarity);
-+ if ( ret )
-+ {
-+ gprintk(XENLOG_ERR,
-+ "unable to bind gsi %u to hardware domain: %d\n", gsi, ret);
-+ unmasked = 0;
-+ }
-+ }
-+
- if ( gsi == 0 || unmasked )
- pt_may_unmask_irq(d, NULL);
- }
---
-2.17.2 (Apple Git-113)
-
diff --git a/emulators/xen-kernel/files/0001-vpci-msi-split-code-to-bind-pirq.patch b/emulators/xen-kernel/files/0001-vpci-msi-split-code-to-bind-pirq.patch
deleted file mode 100644
index 21b8ae0442d1..000000000000
--- a/emulators/xen-kernel/files/0001-vpci-msi-split-code-to-bind-pirq.patch
+++ /dev/null
@@ -1,123 +0,0 @@
-From 9109e5afb99012244e9fbe7f44e7010950051443 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 2 Jul 2018 13:07:26 +0200
-Subject: [PATCH 1/2] vpci/msi: split code to bind pirq
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-And put it in a separate update function. This is required in order to
-improve binding of MSI PIRQs when using vPCI.
-
-No functional change.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/hvm/vmsi.c | 73 +++++++++++++++++++++++++----------------
- 1 file changed, 45 insertions(+), 28 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
-index 5ab7387d78..acadc23f8d 100644
---- a/xen/arch/x86/hvm/vmsi.c
-+++ b/xen/arch/x86/hvm/vmsi.c
-@@ -663,6 +663,42 @@ void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
- vpci_mask_pirq(pdev->domain, msi->arch.pirq + entry, mask);
- }
-
-+static int vpci_msi_update(const struct pci_dev *pdev, uint32_t data,
-+ uint64_t address, unsigned int vectors,
-+ unsigned int pirq, uint32_t mask)
-+{
-+ unsigned int i;
-+
-+ ASSERT(pcidevs_locked());
-+
-+ for ( i = 0; i < vectors; i++ )
-+ {
-+ uint8_t vector = MASK_EXTR(data, MSI_DATA_VECTOR_MASK);
-+ uint8_t vector_mask = 0xff >> (8 - fls(vectors) + 1);
-+ struct xen_domctl_bind_pt_irq bind = {
-+ .machine_irq = pirq + i,
-+ .irq_type = PT_IRQ_TYPE_MSI,
-+ .u.msi.gvec = (vector & ~vector_mask) |
-+ ((vector + i) & vector_mask),
-+ .u.msi.gflags = msi_gflags(data, address, (mask >> i) & 1),
-+ };
-+ int rc = pt_irq_create_bind(pdev->domain, &bind);
-+
-+ if ( rc )
-+ {
-+ gdprintk(XENLOG_ERR,
-+ "%04x:%02x:%02x.%u: failed to bind PIRQ %u: %d\n",
-+ pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
-+ PCI_FUNC(pdev->devfn), pirq + i, rc);
-+ while ( bind.machine_irq-- > pirq )
-+ pt_irq_destroy_bind(pdev->domain, &bind);
-+ return rc;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
- static int vpci_msi_enable(const struct pci_dev *pdev, uint32_t data,
- uint64_t address, unsigned int nr,
- paddr_t table_base, uint32_t mask)
-@@ -674,7 +710,7 @@ static int vpci_msi_enable(const struct pci_dev *pdev, uint32_t data,
- .table_base = table_base,
- .entry_nr = nr,
- };
-- unsigned int i, vectors = table_base ? 1 : nr;
-+ unsigned vectors = table_base ? 1 : nr;
- int rc, pirq = INVALID_PIRQ;
-
- /* Get a PIRQ. */
-@@ -690,36 +726,17 @@ static int vpci_msi_enable(const struct pci_dev *pdev, uint32_t data,
- return rc;
- }
-
-- for ( i = 0; i < vectors; i++ )
-+ pcidevs_lock();
-+ rc = vpci_msi_update(pdev, data, address, vectors, pirq, mask);
-+ if ( rc )
- {
-- uint8_t vector = MASK_EXTR(data, MSI_DATA_VECTOR_MASK);
-- uint8_t vector_mask = 0xff >> (8 - fls(vectors) + 1);
-- struct xen_domctl_bind_pt_irq bind = {
-- .machine_irq = pirq + i,
-- .irq_type = PT_IRQ_TYPE_MSI,
-- .u.msi.gvec = (vector & ~vector_mask) |
-- ((vector + i) & vector_mask),
-- .u.msi.gflags = msi_gflags(data, address, (mask >> i) & 1),
-- };
--
-- pcidevs_lock();
-- rc = pt_irq_create_bind(pdev->domain, &bind);
-- if ( rc )
-- {
-- gdprintk(XENLOG_ERR,
-- "%04x:%02x:%02x.%u: failed to bind PIRQ %u: %d\n",
-- pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
-- PCI_FUNC(pdev->devfn), pirq + i, rc);
-- while ( bind.machine_irq-- > pirq )
-- pt_irq_destroy_bind(pdev->domain, &bind);
-- spin_lock(&pdev->domain->event_lock);
-- unmap_domain_pirq(pdev->domain, pirq);
-- spin_unlock(&pdev->domain->event_lock);
-- pcidevs_unlock();
-- return rc;
-- }
-+ spin_lock(&pdev->domain->event_lock);
-+ unmap_domain_pirq(pdev->domain, pirq);
-+ spin_unlock(&pdev->domain->event_lock);
- pcidevs_unlock();
-+ return rc;
- }
-+ pcidevs_unlock();
-
- return pirq;
- }
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0001-x86-HVM-improve-MTRR-load-checks.patch b/emulators/xen-kernel/files/0001-x86-HVM-improve-MTRR-load-checks.patch
deleted file mode 100644
index 9770ddd67c86..000000000000
--- a/emulators/xen-kernel/files/0001-x86-HVM-improve-MTRR-load-checks.patch
+++ /dev/null
@@ -1,86 +0,0 @@
-From 76159f10b174d8a5cd4c50213a9d21fcc0e9441d Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Mon, 16 Jul 2018 15:08:02 +0200
-Subject: [PATCH 1/7] x86/HVM: improve MTRR load checks
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-We should not assume that the incoming set of values contains exactly
-MTRR_VCNT variable range MSRs. Permit a smaller amount and reject a
-bigger one. As a result the save path then also needs to no longer use
-a fixed upper bound, in turn requiring unused space in the save record
-to be zeroed up front.
-
-Also slightly refine types where appropriate.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-[switch to use MASK_EXTR to get VCNT]
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
----
- xen/arch/x86/hvm/mtrr.c | 28 ++++++++++++++++++----------
- 1 file changed, 18 insertions(+), 10 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
-index c2927fb437..a636012388 100644
---- a/xen/arch/x86/hvm/mtrr.c
-+++ b/xen/arch/x86/hvm/mtrr.c
-@@ -673,22 +673,22 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
-
- static int hvm_save_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
- {
-- int i;
- struct vcpu *v;
-- struct hvm_hw_mtrr hw_mtrr;
-- struct mtrr_state *mtrr_state;
-+
- /* save mtrr&pat */
- for_each_vcpu(d, v)
- {
-- mtrr_state = &v->arch.hvm_vcpu.mtrr;
-+ const struct mtrr_state *mtrr_state = &v->arch.hvm_vcpu.mtrr;
-+ struct hvm_hw_mtrr hw_mtrr = {
-+ .msr_mtrr_def_type = mtrr_state->def_type |
-+ (mtrr_state->enabled << 10),
-+ .msr_mtrr_cap = mtrr_state->mtrr_cap,
-+ };
-+ unsigned int i;
-
- hvm_get_guest_pat(v, &hw_mtrr.msr_pat_cr);
-
-- hw_mtrr.msr_mtrr_def_type = mtrr_state->def_type
-- | (mtrr_state->enabled << 10);
-- hw_mtrr.msr_mtrr_cap = mtrr_state->mtrr_cap;
--
-- for ( i = 0; i < MTRR_VCNT; i++ )
-+ for ( i = 0; i < MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT); i++ )
- {
- /* save physbase */
- hw_mtrr.msr_mtrr_var[i*2] =
-@@ -726,6 +726,14 @@ static int hvm_load_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
- if ( hvm_load_entry(MTRR, h, &hw_mtrr) != 0 )
- return -EINVAL;
-
-+ if ( MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT) > MTRR_VCNT )
-+ {
-+ dprintk(XENLOG_G_ERR,
-+ "HVM restore: %pv: too many (%lu) variable range MTRRs\n",
-+ v, MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT));
-+ return -EINVAL;
-+ }
-+
- mtrr_state = &v->arch.hvm_vcpu.mtrr;
-
- hvm_set_guest_pat(v, hw_mtrr.msr_pat_cr);
-@@ -735,7 +743,7 @@ static int hvm_load_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
- for ( i = 0; i < NUM_FIXED_MSR; i++ )
- mtrr_fix_range_msr_set(d, mtrr_state, i, hw_mtrr.msr_mtrr_fixed[i]);
-
-- for ( i = 0; i < MTRR_VCNT; i++ )
-+ for ( i = 0; i < MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT); i++ )
- {
- mtrr_var_range_msr_set(d, mtrr_state,
- MSR_IA32_MTRR_PHYSBASE(i),
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0001-x86-dom0-add-extra-RAM-regions-as-UNUSABLE-for-PVH-m.patch b/emulators/xen-kernel/files/0001-x86-dom0-add-extra-RAM-regions-as-UNUSABLE-for-PVH-m.patch
deleted file mode 100644
index 11808d48eae9..000000000000
--- a/emulators/xen-kernel/files/0001-x86-dom0-add-extra-RAM-regions-as-UNUSABLE-for-PVH-m.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From e8e58be2b77708fd4d6ba6bca3f70bc507fde4be Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 26 Jun 2018 08:48:14 +0200
-Subject: [PATCH] x86/dom0: add extra RAM regions as UNUSABLE for PVH memory
- map
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-When running as PVH Dom0 the native memory map is used in order to
-craft a tailored memory map for Dom0 taking into account it's memory
-limit.
-
-Dom0 memory is always going to be smaller than the total amount
-of memory present on the host, so in order to prevent Dom0 from
-relocating PCI BARs over RAM regions mark all the RAM regions not
-available to Dom0 as UNUSABLE in the memory map.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/hvm/dom0_build.c | 25 +++++++++++++++++--------
- 1 file changed, 17 insertions(+), 8 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
-index b237508072..e2b5d48e03 100644
---- a/xen/arch/x86/hvm/dom0_build.c
-+++ b/xen/arch/x86/hvm/dom0_build.c
-@@ -314,8 +314,10 @@ static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
-
- /*
- * Craft the e820 memory map for Dom0 based on the hardware e820 map.
-+ * Add an extra entry in case we have to split a RAM entry into a RAM and a
-+ * UNUSABLE one in order to truncate it.
- */
-- d->arch.e820 = xzalloc_array(struct e820entry, e820.nr_map);
-+ d->arch.e820 = xzalloc_array(struct e820entry, e820.nr_map + 1);
- if ( !d->arch.e820 )
- panic("Unable to allocate memory for Dom0 e820 map");
- entry_guest = d->arch.e820;
-@@ -323,19 +325,20 @@ static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
- /* Clamp e820 memory map to match the memory assigned to Dom0 */
- for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
- {
-+ *entry_guest = *entry;
-+
- if ( entry->type != E820_RAM )
-- {
-- *entry_guest = *entry;
- goto next;
-- }
-
- if ( nr_pages == cur_pages )
- {
- /*
-- * We already have all the assigned memory,
-- * skip this entry
-+ * We already have all the requested memory, turn this RAM region
-+ * into a UNUSABLE region in order to prevent Dom0 from placing
-+ * BARs in this area.
- */
-- continue;
-+ entry_guest->type = E820_UNUSABLE;
-+ goto next;
- }
-
- /*
-@@ -358,6 +361,12 @@ static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
- {
- /* Truncate region */
- entry_guest->size = (nr_pages - cur_pages) << PAGE_SHIFT;
-+ /* Add the remaining of the RAM region as UNUSABLE. */
-+ entry_guest++;
-+ d->arch.nr_e820++;
-+ entry_guest->type = E820_UNUSABLE;
-+ entry_guest->addr = start + ((nr_pages - cur_pages) << PAGE_SHIFT);
-+ entry_guest->size = end - entry_guest->addr;
- cur_pages = nr_pages;
- }
- else
-@@ -367,9 +376,9 @@ static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
- next:
- d->arch.nr_e820++;
- entry_guest++;
-+ ASSERT(d->arch.nr_e820 <= e820.nr_map + 1);
- }
- ASSERT(cur_pages == nr_pages);
-- ASSERT(d->arch.nr_e820 <= e820.nr_map);
- }
-
- static int __init pvh_setup_p2m(struct domain *d)
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0001-x86-dom0-propagate-PVH-vlapic-EOIs-to-hardware.patch b/emulators/xen-kernel/files/0001-x86-dom0-propagate-PVH-vlapic-EOIs-to-hardware.patch
deleted file mode 100644
index aaf3a2da22c7..000000000000
--- a/emulators/xen-kernel/files/0001-x86-dom0-propagate-PVH-vlapic-EOIs-to-hardware.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 19d2bce1c3cbfdc636c142cdf0ae38795f2202dd Mon Sep 17 00:00:00 2001
-From: Roger Pau Monne <roger.pau@citrix.com>
-Date: Thu, 14 Feb 2019 14:41:03 +0100
-Subject: [PATCH for-4.12] x86/dom0: propagate PVH vlapic EOIs to hardware
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Current check for MSI EIO is missing a special case for PVH Dom0,
-which doesn't have a hvm_irq_dpci struct but requires EIOs to be
-forwarded to the physical lapic for passed-through devices.
-
-Add a short-circuit to allow EOIs from PVH Dom0 to be propagated.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
----
-Cc: Jan Beulich <jbeulich@suse.com>
-Cc: Juergen Gross <jgross@suse.com>
----
- xen/drivers/passthrough/io.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
-index a6eb8a4336..4290c7c710 100644
---- a/xen/drivers/passthrough/io.c
-+++ b/xen/drivers/passthrough/io.c
-@@ -869,7 +869,8 @@ static int _hvm_dpci_msi_eoi(struct domain *d,
-
- void hvm_dpci_msi_eoi(struct domain *d, int vector)
- {
-- if ( !iommu_enabled || !hvm_domain_irq(d)->dpci )
-+ if ( !iommu_enabled ||
-+ (!hvm_domain_irq(d)->dpci && !is_hardware_domain(d)) )
- return;
-
- spin_lock(&d->event_lock);
---
-2.17.2 (Apple Git-113)
-
diff --git a/emulators/xen-kernel/files/0001-x86-mm-locks-remove-trailing-whitespace.patch b/emulators/xen-kernel/files/0001-x86-mm-locks-remove-trailing-whitespace.patch
deleted file mode 100644
index 6f6210f9ad03..000000000000
--- a/emulators/xen-kernel/files/0001-x86-mm-locks-remove-trailing-whitespace.patch
+++ /dev/null
@@ -1,101 +0,0 @@
-From 468937da985661e5cd1d6b2df6d6ab2d1fb1e5e4 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 12 Mar 2019 12:21:03 +0100
-Subject: [PATCH 1/3] x86/mm-locks: remove trailing whitespace
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-No functional change.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
----
- xen/arch/x86/mm/mm-locks.h | 24 ++++++++++++------------
- 1 file changed, 12 insertions(+), 12 deletions(-)
-
-diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
-index e5fceb2d2e..6c15b9a4cc 100644
---- a/xen/arch/x86/mm/mm-locks.h
-+++ b/xen/arch/x86/mm/mm-locks.h
-@@ -3,11 +3,11 @@
- *
- * Spinlocks used by the code in arch/x86/mm.
- *
-- * Copyright (c) 2011 Citrix Systems, inc.
-+ * Copyright (c) 2011 Citrix Systems, inc.
- * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
- * Copyright (c) 2006-2007 XenSource Inc.
- * Copyright (c) 2006 Michael A Fetterman
-- *
-+ *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
-@@ -41,7 +41,7 @@ static inline void mm_lock_init(mm_lock_t *l)
- l->unlock_level = 0;
- }
-
--static inline int mm_locked_by_me(mm_lock_t *l)
-+static inline int mm_locked_by_me(mm_lock_t *l)
- {
- return (l->lock.recurse_cpu == current->processor);
- }
-@@ -67,7 +67,7 @@ do { \
-
- static inline void _mm_lock(mm_lock_t *l, const char *func, int level, int rec)
- {
-- if ( !((mm_locked_by_me(l)) && rec) )
-+ if ( !((mm_locked_by_me(l)) && rec) )
- __check_lock_level(level);
- spin_lock_recursive(&l->lock);
- if ( l->lock.recurse_cnt == 1 )
-@@ -186,7 +186,7 @@ static inline void mm_unlock(mm_lock_t *l)
- spin_unlock_recursive(&l->lock);
- }
-
--static inline void mm_enforce_order_unlock(int unlock_level,
-+static inline void mm_enforce_order_unlock(int unlock_level,
- unsigned short *recurse_count)
- {
- if ( recurse_count )
-@@ -310,7 +310,7 @@ declare_mm_rwlock(altp2m);
- #define gfn_locked_by_me(p,g) p2m_locked_by_me(p)
-
- /* PoD lock (per-p2m-table)
-- *
-+ *
- * Protects private PoD data structs: entry and cache
- * counts, page lists, sweep parameters. */
-
-@@ -322,7 +322,7 @@ declare_mm_lock(pod)
-
- /* Page alloc lock (per-domain)
- *
-- * This is an external lock, not represented by an mm_lock_t. However,
-+ * This is an external lock, not represented by an mm_lock_t. However,
- * pod code uses it in conjunction with the p2m lock, and expecting
- * the ordering which we enforce here.
- * The lock is not recursive. */
-@@ -338,13 +338,13 @@ declare_mm_order_constraint(page_alloc)
- * For shadow pagetables, this lock protects
- * - all changes to shadow page table pages
- * - the shadow hash table
-- * - the shadow page allocator
-+ * - the shadow page allocator
- * - all changes to guest page table pages
- * - all changes to the page_info->tlbflush_timestamp
-- * - the page_info->count fields on shadow pages
-- *
-- * For HAP, it protects the NPT/EPT tables and mode changes.
-- *
-+ * - the page_info->count fields on shadow pages
-+ *
-+ * For HAP, it protects the NPT/EPT tables and mode changes.
-+ *
- * It also protects the log-dirty bitmap from concurrent accesses (and
- * teardowns, etc). */
-
---
-2.17.2 (Apple Git-113)
-
diff --git a/emulators/xen-kernel/files/0001-x86-mtrr-introduce-mask-to-get-VCNT-from-MTRRcap-MSR.patch b/emulators/xen-kernel/files/0001-x86-mtrr-introduce-mask-to-get-VCNT-from-MTRRcap-MSR.patch
deleted file mode 100644
index d8f8e38bafba..000000000000
--- a/emulators/xen-kernel/files/0001-x86-mtrr-introduce-mask-to-get-VCNT-from-MTRRcap-MSR.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From f7c587fa1341b59f4ff654bd7e55e162f3513130 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Thu, 5 Jul 2018 15:28:56 +0200
-Subject: [PATCH] x86/mtrr: introduce mask to get VCNT from MTRRcap MSR
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-No functional change.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/cpu/mtrr/main.c | 2 +-
- xen/arch/x86/hvm/mtrr.c | 8 ++++----
- xen/include/asm-x86/msr-index.h | 2 ++
- 3 files changed, 7 insertions(+), 5 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/mtrr/main.c b/xen/arch/x86/cpu/mtrr/main.c
-index 56f71a6e1f..e9df53f00d 100644
---- a/xen/arch/x86/cpu/mtrr/main.c
-+++ b/xen/arch/x86/cpu/mtrr/main.c
-@@ -95,7 +95,7 @@ static void __init set_num_var_ranges(void)
- config = 2;
- else if (is_cpu(CENTAUR))
- config = 8;
-- num_var_ranges = config & 0xff;
-+ num_var_ranges = MASK_EXTR(config, MTRRcap_VCNT);
- }
-
- static void __init init_table(void)
-diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
-index c78e5c17ad..c2927fb437 100644
---- a/xen/arch/x86/hvm/mtrr.c
-+++ b/xen/arch/x86/hvm/mtrr.c
-@@ -78,7 +78,7 @@ static uint8_t __read_mostly pat_entry_tbl[PAT_TYPE_NUMS] =
- bool_t is_var_mtrr_overlapped(const struct mtrr_state *m)
- {
- unsigned int seg, i;
-- unsigned int num_var_ranges = (uint8_t)m->mtrr_cap;
-+ unsigned int num_var_ranges = MASK_EXTR(m->mtrr_cap, MTRRcap_VCNT);
-
- for ( i = 0; i < num_var_ranges; i++ )
- {
-@@ -193,7 +193,7 @@ static int get_mtrr_type(const struct mtrr_state *m,
- uint8_t overlap_mtrr = 0;
- uint8_t overlap_mtrr_pos = 0;
- uint64_t mask = -(uint64_t)PAGE_SIZE << order;
-- unsigned int seg, num_var_ranges = m->mtrr_cap & 0xff;
-+ unsigned int seg, num_var_ranges = MASK_EXTR(m->mtrr_cap, MTRRcap_VCNT);
-
- if ( unlikely(!(m->enabled & 0x2)) )
- return MTRR_TYPE_UNCACHABLE;
-@@ -483,7 +483,7 @@ bool mtrr_pat_not_equal(const struct vcpu *vd, const struct vcpu *vs)
-
- if ( md->enabled & 2 )
- {
-- unsigned int num_var_ranges = (uint8_t)md->mtrr_cap;
-+ unsigned int num_var_ranges = MASK_EXTR(md->mtrr_cap, MTRRcap_VCNT);
-
- /* Test default type MSR. */
- if ( md->def_type != ms->def_type )
-@@ -499,7 +499,7 @@ bool mtrr_pat_not_equal(const struct vcpu *vd, const struct vcpu *vs)
- return true;
-
- /* Test variable ranges. */
-- if ( num_var_ranges != (uint8_t)ms->mtrr_cap ||
-+ if ( num_var_ranges != MASK_EXTR(ms->mtrr_cap, MTRRcap_VCNT) ||
- memcmp(md->var_ranges, ms->var_ranges,
- num_var_ranges * sizeof(*md->var_ranges)) )
- return true;
-diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
-index 8fbccc88a7..95bb66916c 100644
---- a/xen/include/asm-x86/msr-index.h
-+++ b/xen/include/asm-x86/msr-index.h
-@@ -60,6 +60,8 @@
- #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
-
- #define MSR_MTRRcap 0x000000fe
-+#define MTRRcap_VCNT 0x000000ff
-+
- #define MSR_IA32_BBL_CR_CTL 0x00000119
-
- #define MSR_IA32_SYSENTER_CS 0x00000174
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0001-x86-pvh-change-the-order-of-the-iommu-initialization.patch b/emulators/xen-kernel/files/0001-x86-pvh-change-the-order-of-the-iommu-initialization.patch
deleted file mode 100644
index 7e7fe935df25..000000000000
--- a/emulators/xen-kernel/files/0001-x86-pvh-change-the-order-of-the-iommu-initialization.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From ec3d58041829e0747d94efa11a44467c3e083b60 Mon Sep 17 00:00:00 2001
-From: Roger Pau Monne <roger.pau@citrix.com>
-Date: Tue, 24 Jul 2018 13:12:18 +0200
-Subject: [PATCH] x86/pvh: change the order of the iommu initialization for
- Dom0
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The iommu initialization will also create MMIO mappings in the Dom0
-p2m, so the paging memory pool needs to be allocated or else iommu
-initialization will fail.
-
-Move the call to init the iommu after the Dom0 p2m has been setup in
-order to solve this.
-
-Note that issues caused by this wrong ordering have only been seen
-when using shadow paging.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
----
-Cc: Jan Beulich <jbeulich@suse.com>
-Cc: Andrew Cooper <andrew.cooper3@citrix.com>
----
- xen/arch/x86/hvm/dom0_build.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
-index 9a833fa4b9..f0cd63b1ec 100644
---- a/xen/arch/x86/hvm/dom0_build.c
-+++ b/xen/arch/x86/hvm/dom0_build.c
-@@ -1093,8 +1093,6 @@ int __init dom0_construct_pvh(struct domain *d, const module_t *image,
-
- printk(XENLOG_INFO "*** Building a PVH Dom%d ***\n", d->domain_id);
-
-- iommu_hwdom_init(d);
--
- rc = pvh_setup_p2m(d);
- if ( rc )
- {
-@@ -1102,6 +1100,8 @@ int __init dom0_construct_pvh(struct domain *d, const module_t *image,
- return rc;
- }
-
-+ iommu_hwdom_init(d);
-+
- rc = pvh_load_kernel(d, image, image_headroom, initrd, bootstrap_map(image),
- cmdline, &entry, &start_info);
- if ( rc )
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0001-x86-replace-usage-in-the-linker-script.patch b/emulators/xen-kernel/files/0001-x86-replace-usage-in-the-linker-script.patch
deleted file mode 100644
index 4b1f808d9a7a..000000000000
--- a/emulators/xen-kernel/files/0001-x86-replace-usage-in-the-linker-script.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From e21ba44f771226a5f6f0ce269aabcfb019eae539 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Thu, 12 Jul 2018 10:48:18 +0200
-Subject: [PATCH] x86: replace '||' usage in the linker script
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-With '|'. The result is the same, and the later works with lld. Fixes
-the following error when building Xen with lld:
-
-ld -melf_x86_64_fbsd -T xen.lds -N prelink.o --build-id=sha1 \
- /root/src/xen/xen/common/symbols-dummy.o -o /root/src/xen/xen/.xen-syms.0
-ld: error: xen.lds:260: malformed number: |
->>> ASSERT(__image_base__ > (((((((((261 >> 8) * 0xffff000000000000) | (261 << 39))) + ((1 << 39) / 2)) + (64 << 30)) + (1 << 30)) + (1 << 30))) ||
->>> ^
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/xen.lds.S | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
-index 70afedd31d..326e885402 100644
---- a/xen/arch/x86/xen.lds.S
-+++ b/xen/arch/x86/xen.lds.S
-@@ -331,7 +331,7 @@ SECTIONS
- .comment 0 : { *(.comment) }
- }
-
--ASSERT(__image_base__ > XEN_VIRT_START ||
-+ASSERT(__image_base__ > XEN_VIRT_START |
- __2M_rwdata_end <= XEN_VIRT_END - NR_CPUS * PAGE_SIZE,
- "Xen image overlaps stubs area")
-
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0002-vpci-msi-fix-update-of-bound-MSI-interrupts.patch b/emulators/xen-kernel/files/0002-vpci-msi-fix-update-of-bound-MSI-interrupts.patch
deleted file mode 100644
index 80e7816adee9..000000000000
--- a/emulators/xen-kernel/files/0002-vpci-msi-fix-update-of-bound-MSI-interrupts.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From 1e34ed7174cce6ab37e420dda9452267301fb7d2 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 2 Jul 2018 13:07:55 +0200
-Subject: [PATCH 2/2] vpci/msi: fix update of bound MSI interrupts
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Current update process of already bound MSI interrupts is wrong
-because unmap_domain_pirq calls pci_disable_msi, which disables MSI
-interrupts on the device. On the other hand map_domain_pirq doesn't
-enable MSI, so the current update process of already enabled MSI
-entries is wrong because MSI control bit will be disabled by
-unmap_domain_pirq and not re-enabled by map_domain_pirq.
-
-In order to fix this avoid unmapping the PIRQs and just update the
-binding of the PIRQ. A new arch helper to do that is introduced.
-
-Note that MSI-X is not affected because unmap_domain_pirq only
-disables the MSI enable control bit for the MSI case, for MSI-X the
-bit is left untouched by unmap_domain_pirq.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/hvm/vmsi.c | 23 +++++++++++++++++++++++
- xen/drivers/vpci/msi.c | 3 +--
- xen/include/xen/vpci.h | 2 ++
- 3 files changed, 26 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
-index acadc23f8d..3001d5c488 100644
---- a/xen/arch/x86/hvm/vmsi.c
-+++ b/xen/arch/x86/hvm/vmsi.c
-@@ -699,6 +699,29 @@ static int vpci_msi_update(const struct pci_dev *pdev, uint32_t data,
- return 0;
- }
-
-+int vpci_msi_arch_update(struct vpci_msi *msi, const struct pci_dev *pdev)
-+{
-+ int rc;
-+
-+ ASSERT(msi->arch.pirq != INVALID_PIRQ);
-+
-+ pcidevs_lock();
-+ rc = vpci_msi_update(pdev, msi->data, msi->address, msi->vectors,
-+ msi->arch.pirq, msi->mask);
-+ if ( rc )
-+ {
-+ spin_lock(&pdev->domain->event_lock);
-+ unmap_domain_pirq(pdev->domain, msi->arch.pirq);
-+ spin_unlock(&pdev->domain->event_lock);
-+ pcidevs_unlock();
-+ msi->arch.pirq = INVALID_PIRQ;
-+ return rc;
-+ }
-+ pcidevs_unlock();
-+
-+ return 0;
-+}
-+
- static int vpci_msi_enable(const struct pci_dev *pdev, uint32_t data,
- uint64_t address, unsigned int nr,
- paddr_t table_base, uint32_t mask)
-diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
-index ad26c38a92..8f15ad7bf2 100644
---- a/xen/drivers/vpci/msi.c
-+++ b/xen/drivers/vpci/msi.c
-@@ -87,8 +87,7 @@ static void update_msi(const struct pci_dev *pdev, struct vpci_msi *msi)
- if ( !msi->enabled )
- return;
-
-- vpci_msi_arch_disable(msi, pdev);
-- if ( vpci_msi_arch_enable(msi, pdev, msi->vectors) )
-+ if ( vpci_msi_arch_update(msi, pdev) )
- msi->enabled = false;
- }
-
-diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
-index 72d2225a97..af2b8580ee 100644
---- a/xen/include/xen/vpci.h
-+++ b/xen/include/xen/vpci.h
-@@ -159,6 +159,8 @@ int __must_check vpci_msi_arch_enable(struct vpci_msi *msi,
- const struct pci_dev *pdev,
- unsigned int vectors);
- void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev);
-+int __must_check vpci_msi_arch_update(struct vpci_msi *msi,
-+ const struct pci_dev *pdev);
- void vpci_msi_arch_init(struct vpci_msi *msi);
- void vpci_msi_arch_print(const struct vpci_msi *msi);
-
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0002-x86-mm-locks-convert-some-macros-to-inline-functions.patch b/emulators/xen-kernel/files/0002-x86-mm-locks-convert-some-macros-to-inline-functions.patch
deleted file mode 100644
index 86750bb16292..000000000000
--- a/emulators/xen-kernel/files/0002-x86-mm-locks-convert-some-macros-to-inline-functions.patch
+++ /dev/null
@@ -1,210 +0,0 @@
-From 45e260afe7ee0e6b18a7e64173a081eec6e056aa Mon Sep 17 00:00:00 2001
-From: Roger Pau Monne <roger.pau@citrix.com>
-Date: Tue, 12 Mar 2019 12:24:37 +0100
-Subject: [PATCH 2/3] x86/mm-locks: convert some macros to inline functions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-And rename to have only one prefix underscore where applicable.
-
-No functional change.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
----
- xen/arch/x86/mm/mm-locks.h | 98 ++++++++++++++++++++------------------
- 1 file changed, 52 insertions(+), 46 deletions(-)
-
-diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
-index 6c15b9a4cc..d3497713e9 100644
---- a/xen/arch/x86/mm/mm-locks.h
-+++ b/xen/arch/x86/mm/mm-locks.h
-@@ -29,7 +29,6 @@
-
- /* Per-CPU variable for enforcing the lock ordering */
- DECLARE_PER_CPU(int, mm_lock_level);
--#define __get_lock_level() (this_cpu(mm_lock_level))
-
- DECLARE_PERCPU_RWLOCK_GLOBAL(p2m_percpu_rwlock);
-
-@@ -46,43 +45,47 @@ static inline int mm_locked_by_me(mm_lock_t *l)
- return (l->lock.recurse_cpu == current->processor);
- }
-
-+static inline int _get_lock_level(void)
-+{
-+ return this_cpu(mm_lock_level);
-+}
-+
- /*
- * If you see this crash, the numbers printed are order levels defined
- * in this file.
- */
--#define __check_lock_level(l) \
--do { \
-- if ( unlikely(__get_lock_level() > (l)) ) \
-- { \
-- printk("mm locking order violation: %i > %i\n", \
-- __get_lock_level(), (l)); \
-- BUG(); \
-- } \
--} while(0)
--
--#define __set_lock_level(l) \
--do { \
-- __get_lock_level() = (l); \
--} while(0)
-+static inline void _check_lock_level(int l)
-+{
-+ if ( unlikely(_get_lock_level() > l) )
-+ {
-+ printk("mm locking order violation: %i > %i\n", _get_lock_level(), l);
-+ BUG();
-+ }
-+}
-+
-+static inline void _set_lock_level(int l)
-+{
-+ this_cpu(mm_lock_level) = l;
-+}
-
- static inline void _mm_lock(mm_lock_t *l, const char *func, int level, int rec)
- {
- if ( !((mm_locked_by_me(l)) && rec) )
-- __check_lock_level(level);
-+ _check_lock_level(level);
- spin_lock_recursive(&l->lock);
- if ( l->lock.recurse_cnt == 1 )
- {
- l->locker_function = func;
-- l->unlock_level = __get_lock_level();
-+ l->unlock_level = _get_lock_level();
- }
- else if ( (unlikely(!rec)) )
-- panic("mm lock already held by %s", l->locker_function);
-- __set_lock_level(level);
-+ panic("mm lock already held by %s\n", l->locker_function);
-+ _set_lock_level(level);
- }
-
- static inline void _mm_enforce_order_lock_pre(int level)
- {
-- __check_lock_level(level);
-+ _check_lock_level(level);
- }
-
- static inline void _mm_enforce_order_lock_post(int level, int *unlock_level,
-@@ -92,12 +95,12 @@ static inline void _mm_enforce_order_lock_post(int level, int *unlock_level,
- {
- if ( (*recurse_count)++ == 0 )
- {
-- *unlock_level = __get_lock_level();
-+ *unlock_level = _get_lock_level();
- }
- } else {
-- *unlock_level = __get_lock_level();
-+ *unlock_level = _get_lock_level();
- }
-- __set_lock_level(level);
-+ _set_lock_level(level);
- }
-
-
-@@ -118,12 +121,12 @@ static inline void _mm_write_lock(mm_rwlock_t *l, const char *func, int level)
- {
- if ( !mm_write_locked_by_me(l) )
- {
-- __check_lock_level(level);
-+ _check_lock_level(level);
- percpu_write_lock(p2m_percpu_rwlock, &l->lock);
- l->locker = get_processor_id();
- l->locker_function = func;
-- l->unlock_level = __get_lock_level();
-- __set_lock_level(level);
-+ l->unlock_level = _get_lock_level();
-+ _set_lock_level(level);
- }
- l->recurse_count++;
- }
-@@ -134,13 +137,13 @@ static inline void mm_write_unlock(mm_rwlock_t *l)
- return;
- l->locker = -1;
- l->locker_function = "nobody";
-- __set_lock_level(l->unlock_level);
-+ _set_lock_level(l->unlock_level);
- percpu_write_unlock(p2m_percpu_rwlock, &l->lock);
- }
-
- static inline void _mm_read_lock(mm_rwlock_t *l, int level)
- {
-- __check_lock_level(level);
-+ _check_lock_level(level);
- percpu_read_lock(p2m_percpu_rwlock, &l->lock);
- /* There's nowhere to store the per-CPU unlock level so we can't
- * set the lock level. */
-@@ -181,7 +184,7 @@ static inline void mm_unlock(mm_lock_t *l)
- if ( l->lock.recurse_cnt == 1 )
- {
- l->locker_function = "nobody";
-- __set_lock_level(l->unlock_level);
-+ _set_lock_level(l->unlock_level);
- }
- spin_unlock_recursive(&l->lock);
- }
-@@ -194,10 +197,10 @@ static inline void mm_enforce_order_unlock(int unlock_level,
- BUG_ON(*recurse_count == 0);
- if ( (*recurse_count)-- == 1 )
- {
-- __set_lock_level(unlock_level);
-+ _set_lock_level(unlock_level);
- }
- } else {
-- __set_lock_level(unlock_level);
-+ _set_lock_level(unlock_level);
- }
- }
-
-@@ -287,21 +290,24 @@ declare_mm_lock(altp2mlist)
-
- #define MM_LOCK_ORDER_altp2m 40
- declare_mm_rwlock(altp2m);
--#define p2m_lock(p) \
-- do { \
-- if ( p2m_is_altp2m(p) ) \
-- mm_write_lock(altp2m, &(p)->lock); \
-- else \
-- mm_write_lock(p2m, &(p)->lock); \
-- (p)->defer_flush++; \
-- } while (0)
--#define p2m_unlock(p) \
-- do { \
-- if ( --(p)->defer_flush == 0 ) \
-- p2m_unlock_and_tlb_flush(p); \
-- else \
-- mm_write_unlock(&(p)->lock); \
-- } while (0)
-+
-+static inline void p2m_lock(struct p2m_domain *p)
-+{
-+ if ( p2m_is_altp2m(p) )
-+ mm_write_lock(altp2m, &p->lock);
-+ else
-+ mm_write_lock(p2m, &p->lock);
-+ p->defer_flush++;
-+}
-+
-+static inline void p2m_unlock(struct p2m_domain *p)
-+{
-+ if ( --p->defer_flush == 0 )
-+ p2m_unlock_and_tlb_flush(p);
-+ else
-+ mm_write_unlock(&p->lock);
-+}
-+
- #define gfn_lock(p,g,o) p2m_lock(p)
- #define gfn_unlock(p,g,o) p2m_unlock(p)
- #define p2m_read_lock(p) mm_read_lock(p2m, &(p)->lock)
---
-2.17.2 (Apple Git-113)
-
diff --git a/emulators/xen-kernel/files/0002-x86-mtrr-split-enabled-field-into-two-boolean-flags.patch b/emulators/xen-kernel/files/0002-x86-mtrr-split-enabled-field-into-two-boolean-flags.patch
deleted file mode 100644
index 1ae66a2de005..000000000000
--- a/emulators/xen-kernel/files/0002-x86-mtrr-split-enabled-field-into-two-boolean-flags.patch
+++ /dev/null
@@ -1,198 +0,0 @@
-From 8ebc60e0274b770743e59256f665789d4308b188 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Mon, 16 Jul 2018 15:09:12 +0200
-Subject: [PATCH 2/7] x86/mtrr: split "enabled" field into two boolean flags
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The code hopefully is more readable this way.
-
-Also switch have_fixed to bool, seeing that it already is used as a
-boolean.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-[switched to use MASK_*]
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
----
- xen/arch/x86/cpu/mtrr/generic.c | 14 +++++++++-----
- xen/arch/x86/hvm/hvm.c | 6 ++++--
- xen/arch/x86/hvm/mtrr.c | 23 ++++++++++++++---------
- xen/include/asm-x86/msr-index.h | 2 ++
- xen/include/asm-x86/mtrr.h | 5 +++--
- 5 files changed, 32 insertions(+), 18 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c
-index 7ba0c3f0fe..09763654be 100644
---- a/xen/arch/x86/cpu/mtrr/generic.c
-+++ b/xen/arch/x86/cpu/mtrr/generic.c
-@@ -80,7 +80,8 @@ void __init get_mtrr_state(void)
-
- rdmsrl(MSR_MTRRdefType, msr_content);
- mtrr_state.def_type = (msr_content & 0xff);
-- mtrr_state.enabled = (msr_content & 0xc00) >> 10;
-+ mtrr_state.enabled = MASK_EXTR(msr_content, MTRRdefType_E);
-+ mtrr_state.fixed_enabled = MASK_EXTR(msr_content, MTRRdefType_FE);
-
- /* Store mtrr_cap for HVM MTRR virtualisation. */
- rdmsrl(MSR_MTRRcap, mtrr_state.mtrr_cap);
-@@ -159,7 +160,7 @@ static void __init print_mtrr_state(const char *level)
- unsigned int base = 0, step = 0x10000;
-
- printk("%sMTRR fixed ranges %sabled:\n", level,
-- mtrr_state.enabled & 1 ? "en" : "dis");
-+ mtrr_state.fixed_enabled ? "en" : "dis");
- for (; block->ranges; ++block, step >>= 2) {
- for (i = 0; i < block->ranges; ++i, fr += 8) {
- print_fixed(base, step, fr, level);
-@@ -169,7 +170,7 @@ static void __init print_mtrr_state(const char *level)
- print_fixed_last(level);
- }
- printk("%sMTRR variable ranges %sabled:\n", level,
-- mtrr_state.enabled & 2 ? "en" : "dis");
-+ mtrr_state.enabled ? "en" : "dis");
- width = (paddr_bits - PAGE_SHIFT + 3) / 4;
-
- for (i = 0; i < num_var_ranges; ++i) {
-@@ -383,8 +384,11 @@ static unsigned long set_mtrr_state(void)
- /* Set_mtrr_restore restores the old value of MTRRdefType,
- so to set it we fiddle with the saved value */
- if ((deftype & 0xff) != mtrr_state.def_type
-- || ((deftype & 0xc00) >> 10) != mtrr_state.enabled) {
-- deftype = (deftype & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10);
-+ || MASK_EXTR(deftype, MTRRdefType_E) != mtrr_state.enabled
-+ || MASK_EXTR(deftype, MTRRdefType_FE) != mtrr_state.fixed_enabled) {
-+ deftype = (deftype & ~0xcff) | mtrr_state.def_type |
-+ MASK_INSR(mtrr_state.enabled, MTRRdefType_E) |
-+ MASK_INSR(mtrr_state.fixed_enabled, MTRRdefType_FE);
- change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
- }
-
-diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
-index e022f5ab0e..3e2abeb274 100644
---- a/xen/arch/x86/hvm/hvm.c
-+++ b/xen/arch/x86/hvm/hvm.c
-@@ -3453,8 +3453,10 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
- case MSR_MTRRdefType:
- if ( !d->arch.cpuid->basic.mtrr )
- goto gp_fault;
-- *msr_content = v->arch.hvm_vcpu.mtrr.def_type
-- | (v->arch.hvm_vcpu.mtrr.enabled << 10);
-+ *msr_content = v->arch.hvm_vcpu.mtrr.def_type |
-+ MASK_INSR(v->arch.hvm_vcpu.mtrr.enabled, MTRRdefType_E) |
-+ MASK_INSR(v->arch.hvm_vcpu.mtrr.fixed_enabled,
-+ MTRRdefType_FE);
- break;
- case MSR_MTRRfix64K_00000:
- if ( !d->arch.cpuid->basic.mtrr )
-diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
-index a636012388..d74b363851 100644
---- a/xen/arch/x86/hvm/mtrr.c
-+++ b/xen/arch/x86/hvm/mtrr.c
-@@ -195,11 +195,11 @@ static int get_mtrr_type(const struct mtrr_state *m,
- uint64_t mask = -(uint64_t)PAGE_SIZE << order;
- unsigned int seg, num_var_ranges = MASK_EXTR(m->mtrr_cap, MTRRcap_VCNT);
-
-- if ( unlikely(!(m->enabled & 0x2)) )
-+ if ( unlikely(!m->enabled) )
- return MTRR_TYPE_UNCACHABLE;
-
- pa &= mask;
-- if ( (pa < 0x100000) && (m->enabled & 1) )
-+ if ( (pa < 0x100000) && m->fixed_enabled )
- {
- /* Fixed range MTRR takes effect. */
- uint32_t addr = (uint32_t)pa, index;
-@@ -391,7 +391,8 @@ bool_t mtrr_def_type_msr_set(struct domain *d, struct mtrr_state *m,
- uint64_t msr_content)
- {
- uint8_t def_type = msr_content & 0xff;
-- uint8_t enabled = (msr_content >> 10) & 0x3;
-+ bool fixed_enabled = MASK_EXTR(msr_content, MTRRdefType_FE);
-+ bool enabled = MASK_EXTR(msr_content, MTRRdefType_E);
-
- if ( unlikely(!valid_mtrr_type(def_type)) )
- {
-@@ -406,10 +407,12 @@ bool_t mtrr_def_type_msr_set(struct domain *d, struct mtrr_state *m,
- return 0;
- }
-
-- if ( m->enabled != enabled || m->def_type != def_type )
-+ if ( m->enabled != enabled || m->fixed_enabled != fixed_enabled ||
-+ m->def_type != def_type )
- {
- m->enabled = enabled;
- m->def_type = def_type;
-+ m->fixed_enabled = fixed_enabled;
- memory_type_changed(d);
- }
-
-@@ -478,10 +481,10 @@ bool mtrr_pat_not_equal(const struct vcpu *vd, const struct vcpu *vs)
- const struct mtrr_state *md = &vd->arch.hvm_vcpu.mtrr;
- const struct mtrr_state *ms = &vs->arch.hvm_vcpu.mtrr;
-
-- if ( (md->enabled ^ ms->enabled) & 2 )
-+ if ( md->enabled != ms->enabled )
- return true;
-
-- if ( md->enabled & 2 )
-+ if ( md->enabled )
- {
- unsigned int num_var_ranges = MASK_EXTR(md->mtrr_cap, MTRRcap_VCNT);
-
-@@ -490,10 +493,10 @@ bool mtrr_pat_not_equal(const struct vcpu *vd, const struct vcpu *vs)
- return true;
-
- /* Test fixed ranges. */
-- if ( (md->enabled ^ ms->enabled) & 1 )
-+ if ( md->fixed_enabled != ms->fixed_enabled )
- return true;
-
-- if ( (md->enabled & 1) &&
-+ if ( md->fixed_enabled &&
- memcmp(md->fixed_ranges, ms->fixed_ranges,
- sizeof(md->fixed_ranges)) )
- return true;
-@@ -681,7 +684,9 @@ static int hvm_save_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
- const struct mtrr_state *mtrr_state = &v->arch.hvm_vcpu.mtrr;
- struct hvm_hw_mtrr hw_mtrr = {
- .msr_mtrr_def_type = mtrr_state->def_type |
-- (mtrr_state->enabled << 10),
-+ MASK_INSR(mtrr_state->fixed_enabled,
-+ MTRRdefType_FE) |
-+ MASK_INSR(mtrr_state->enabled, MTRRdefType_E),
- .msr_mtrr_cap = mtrr_state->mtrr_cap,
- };
- unsigned int i;
-diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
-index 95bb66916c..94bccf73a1 100644
---- a/xen/include/asm-x86/msr-index.h
-+++ b/xen/include/asm-x86/msr-index.h
-@@ -98,6 +98,8 @@
- #define MSR_MTRRfix4K_F0000 0x0000026e
- #define MSR_MTRRfix4K_F8000 0x0000026f
- #define MSR_MTRRdefType 0x000002ff
-+#define MTRRdefType_FE (1u << 10)
-+#define MTRRdefType_E (1u << 11)
-
- #define MSR_IA32_DEBUGCTLMSR 0x000001d9
- #define IA32_DEBUGCTLMSR_LBR (1<<0) /* Last Branch Record */
-diff --git a/xen/include/asm-x86/mtrr.h b/xen/include/asm-x86/mtrr.h
-index 5cdc5d4fe3..b1f7af6396 100644
---- a/xen/include/asm-x86/mtrr.h
-+++ b/xen/include/asm-x86/mtrr.h
-@@ -50,8 +50,9 @@ struct mtrr_var_range {
- struct mtrr_state {
- struct mtrr_var_range *var_ranges;
- mtrr_type fixed_ranges[NUM_FIXED_RANGES];
-- unsigned char enabled;
-- unsigned char have_fixed;
-+ bool enabled;
-+ bool fixed_enabled;
-+ bool have_fixed;
- mtrr_type def_type;
-
- u64 mtrr_cap;
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0003-hvm-mtrr-add-emacs-local-variables-block-with-format.patch b/emulators/xen-kernel/files/0003-hvm-mtrr-add-emacs-local-variables-block-with-format.patch
deleted file mode 100644
index f94363b1f028..000000000000
--- a/emulators/xen-kernel/files/0003-hvm-mtrr-add-emacs-local-variables-block-with-format.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From de3b31312248646394a78b837b8a02f2483cad02 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 16 Jul 2018 15:09:50 +0200
-Subject: [PATCH 3/7] hvm/mtrr: add emacs local variables block with formatting
- info
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
----
- xen/arch/x86/hvm/mtrr.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
-index d74b363851..7db0d473e8 100644
---- a/xen/arch/x86/hvm/mtrr.c
-+++ b/xen/arch/x86/hvm/mtrr.c
-@@ -871,3 +871,13 @@ int epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
-
- return MTRR_TYPE_UNCACHABLE;
- }
-+
-+/*
-+ * Local variables:
-+ * mode: C
-+ * c-file-style: "BSD"
-+ * c-basic-offset: 4
-+ * tab-width: 4
-+ * indent-tabs-mode: nil
-+ * End:
-+ */
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0003-x86-mm-locks-apply-a-bias-to-lock-levels-for-control.patch b/emulators/xen-kernel/files/0003-x86-mm-locks-apply-a-bias-to-lock-levels-for-control.patch
deleted file mode 100644
index 4927a4fd7cee..000000000000
--- a/emulators/xen-kernel/files/0003-x86-mm-locks-apply-a-bias-to-lock-levels-for-control.patch
+++ /dev/null
@@ -1,319 +0,0 @@
-From efce89c1df5969486bef82eec05223a4a6522d2d Mon Sep 17 00:00:00 2001
-From: Roger Pau Monne <roger.pau@citrix.com>
-Date: Tue, 12 Mar 2019 12:25:21 +0100
-Subject: [PATCH 3/3] x86/mm-locks: apply a bias to lock levels for control
- domain
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-paging_log_dirty_op function takes mm locks from a subject domain and
-then attempts to perform copy to operations against the caller domain
-in order to copy the result of the hypercall into the caller provided
-buffer.
-
-This works fine when the caller is a non-paging domain, but triggers a
-lock order panic when the caller is a paging domain due to the fact
-that at the point where the copy to operation is performed the subject
-domain paging lock is locked, and the copy operation requires
-locking the caller p2m lock which has a lower level.
-
-Fix this limitation by adding a bias to the level of control domain mm
-locks, so that the lower control domain mm lock always has a level
-greater than the higher unprivileged domain lock level. This allows
-locking the subject domain mm locks and then locking the control
-domain mm locks, while keeping the same lock ordering and the changes
-mostly confined to mm-locks.h.
-
-Note that so far only this flow (locking a subject domain locks and
-then the control domain ones) has been identified, but not all
-possible code paths have been inspected. Hence this solution attempts
-to be a non-intrusive fix for the problem at hand, without discarding
-further changes in the future if other valid code paths are found that
-require more complex lock level ordering.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
----
- xen/arch/x86/mm/mm-locks.h | 119 +++++++++++++++++++++++--------------
- xen/arch/x86/mm/p2m-pod.c | 5 +-
- 2 files changed, 78 insertions(+), 46 deletions(-)
-
-diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
-index d3497713e9..d6c073dc5c 100644
---- a/xen/arch/x86/mm/mm-locks.h
-+++ b/xen/arch/x86/mm/mm-locks.h
-@@ -50,15 +50,35 @@ static inline int _get_lock_level(void)
- return this_cpu(mm_lock_level);
- }
-
-+#define MM_LOCK_ORDER_MAX 64
-+/*
-+ * Return the lock level taking the domain bias into account. If the domain is
-+ * privileged a bias of MM_LOCK_ORDER_MAX is applied to the lock level, so that
-+ * mm locks that belong to a control domain can be acquired after having
-+ * acquired mm locks of an unprivileged domain.
-+ *
-+ * This is required in order to use some hypercalls from a paging domain that
-+ * take locks of a subject domain and then attempt to copy data to/from the
-+ * caller domain.
-+ */
-+static inline int _lock_level(const struct domain *d, int l)
-+{
-+ ASSERT(l <= MM_LOCK_ORDER_MAX);
-+
-+ return l + (d && is_control_domain(d) ? MM_LOCK_ORDER_MAX : 0);
-+}
-+
- /*
- * If you see this crash, the numbers printed are order levels defined
- * in this file.
- */
--static inline void _check_lock_level(int l)
-+static inline void _check_lock_level(const struct domain *d, int l)
- {
-- if ( unlikely(_get_lock_level() > l) )
-+ int lvl = _lock_level(d, l);
-+
-+ if ( unlikely(_get_lock_level() > lvl) )
- {
-- printk("mm locking order violation: %i > %i\n", _get_lock_level(), l);
-+ printk("mm locking order violation: %i > %i\n", _get_lock_level(), lvl);
- BUG();
- }
- }
-@@ -68,10 +88,11 @@ static inline void _set_lock_level(int l)
- this_cpu(mm_lock_level) = l;
- }
-
--static inline void _mm_lock(mm_lock_t *l, const char *func, int level, int rec)
-+static inline void _mm_lock(const struct domain *d, mm_lock_t *l,
-+ const char *func, int level, int rec)
- {
- if ( !((mm_locked_by_me(l)) && rec) )
-- _check_lock_level(level);
-+ _check_lock_level(d, level);
- spin_lock_recursive(&l->lock);
- if ( l->lock.recurse_cnt == 1 )
- {
-@@ -80,16 +101,17 @@ static inline void _mm_lock(mm_lock_t *l, const char *func, int level, int rec)
- }
- else if ( (unlikely(!rec)) )
- panic("mm lock already held by %s\n", l->locker_function);
-- _set_lock_level(level);
-+ _set_lock_level(_lock_level(d, level));
- }
-
--static inline void _mm_enforce_order_lock_pre(int level)
-+static inline void _mm_enforce_order_lock_pre(const struct domain *d, int level)
- {
-- _check_lock_level(level);
-+ _check_lock_level(d, level);
- }
-
--static inline void _mm_enforce_order_lock_post(int level, int *unlock_level,
-- unsigned short *recurse_count)
-+static inline void _mm_enforce_order_lock_post(const struct domain *d, int level,
-+ int *unlock_level,
-+ unsigned short *recurse_count)
- {
- if ( recurse_count )
- {
-@@ -100,7 +122,7 @@ static inline void _mm_enforce_order_lock_post(int level, int *unlock_level,
- } else {
- *unlock_level = _get_lock_level();
- }
-- _set_lock_level(level);
-+ _set_lock_level(_lock_level(d, level));
- }
-
-
-@@ -117,16 +139,17 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l)
- return (l->locker == get_processor_id());
- }
-
--static inline void _mm_write_lock(mm_rwlock_t *l, const char *func, int level)
-+static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
-+ const char *func, int level)
- {
- if ( !mm_write_locked_by_me(l) )
- {
-- _check_lock_level(level);
-+ _check_lock_level(d, level);
- percpu_write_lock(p2m_percpu_rwlock, &l->lock);
- l->locker = get_processor_id();
- l->locker_function = func;
- l->unlock_level = _get_lock_level();
-- _set_lock_level(level);
-+ _set_lock_level(_lock_level(d, level));
- }
- l->recurse_count++;
- }
-@@ -141,9 +164,10 @@ static inline void mm_write_unlock(mm_rwlock_t *l)
- percpu_write_unlock(p2m_percpu_rwlock, &l->lock);
- }
-
--static inline void _mm_read_lock(mm_rwlock_t *l, int level)
-+static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
-+ int level)
- {
-- _check_lock_level(level);
-+ _check_lock_level(d, level);
- percpu_read_lock(p2m_percpu_rwlock, &l->lock);
- /* There's nowhere to store the per-CPU unlock level so we can't
- * set the lock level. */
-@@ -156,28 +180,32 @@ static inline void mm_read_unlock(mm_rwlock_t *l)
-
- /* This wrapper uses the line number to express the locking order below */
- #define declare_mm_lock(name) \
-- static inline void mm_lock_##name(mm_lock_t *l, const char *func, int rec)\
-- { _mm_lock(l, func, MM_LOCK_ORDER_##name, rec); }
-+ static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l, \
-+ const char *func, int rec) \
-+ { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); }
- #define declare_mm_rwlock(name) \
-- static inline void mm_write_lock_##name(mm_rwlock_t *l, const char *func) \
-- { _mm_write_lock(l, func, MM_LOCK_ORDER_##name); } \
-- static inline void mm_read_lock_##name(mm_rwlock_t *l) \
-- { _mm_read_lock(l, MM_LOCK_ORDER_##name); }
-+ static inline void mm_write_lock_##name(const struct domain *d, \
-+ mm_rwlock_t *l, const char *func) \
-+ { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); } \
-+ static inline void mm_read_lock_##name(const struct domain *d, \
-+ mm_rwlock_t *l) \
-+ { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); }
- /* These capture the name of the calling function */
--#define mm_lock(name, l) mm_lock_##name(l, __func__, 0)
--#define mm_lock_recursive(name, l) mm_lock_##name(l, __func__, 1)
--#define mm_write_lock(name, l) mm_write_lock_##name(l, __func__)
--#define mm_read_lock(name, l) mm_read_lock_##name(l)
-+#define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0)
-+#define mm_lock_recursive(name, d, l) mm_lock_##name(d, l, __func__, 1)
-+#define mm_write_lock(name, d, l) mm_write_lock_##name(d, l, __func__)
-+#define mm_read_lock(name, d, l) mm_read_lock_##name(d, l)
-
- /* This wrapper is intended for "external" locks which do not use
- * the mm_lock_t types. Such locks inside the mm code are also subject
- * to ordering constraints. */
--#define declare_mm_order_constraint(name) \
-- static inline void mm_enforce_order_lock_pre_##name(void) \
-- { _mm_enforce_order_lock_pre(MM_LOCK_ORDER_##name); } \
-- static inline void mm_enforce_order_lock_post_##name( \
-- int *unlock_level, unsigned short *recurse_count) \
-- { _mm_enforce_order_lock_post(MM_LOCK_ORDER_##name, unlock_level, recurse_count); } \
-+#define declare_mm_order_constraint(name) \
-+ static inline void mm_enforce_order_lock_pre_##name(const struct domain *d) \
-+ { _mm_enforce_order_lock_pre(d, MM_LOCK_ORDER_##name); } \
-+ static inline void mm_enforce_order_lock_post_##name(const struct domain *d,\
-+ int *unlock_level, unsigned short *recurse_count) \
-+ { _mm_enforce_order_lock_post(d, MM_LOCK_ORDER_##name, unlock_level, \
-+ recurse_count); }
-
- static inline void mm_unlock(mm_lock_t *l)
- {
-@@ -221,7 +249,7 @@ static inline void mm_enforce_order_unlock(int unlock_level,
-
- #define MM_LOCK_ORDER_nestedp2m 8
- declare_mm_lock(nestedp2m)
--#define nestedp2m_lock(d) mm_lock(nestedp2m, &(d)->arch.nested_p2m_lock)
-+#define nestedp2m_lock(d) mm_lock(nestedp2m, d, &(d)->arch.nested_p2m_lock)
- #define nestedp2m_unlock(d) mm_unlock(&(d)->arch.nested_p2m_lock)
-
- /* P2M lock (per-non-alt-p2m-table)
-@@ -260,9 +288,10 @@ declare_mm_rwlock(p2m);
-
- #define MM_LOCK_ORDER_per_page_sharing 24
- declare_mm_order_constraint(per_page_sharing)
--#define page_sharing_mm_pre_lock() mm_enforce_order_lock_pre_per_page_sharing()
-+#define page_sharing_mm_pre_lock() \
-+ mm_enforce_order_lock_pre_per_page_sharing(NULL)
- #define page_sharing_mm_post_lock(l, r) \
-- mm_enforce_order_lock_post_per_page_sharing((l), (r))
-+ mm_enforce_order_lock_post_per_page_sharing(NULL, (l), (r))
- #define page_sharing_mm_unlock(l, r) mm_enforce_order_unlock((l), (r))
-
- /* Alternate P2M list lock (per-domain)
-@@ -275,7 +304,8 @@ declare_mm_order_constraint(per_page_sharing)
-
- #define MM_LOCK_ORDER_altp2mlist 32
- declare_mm_lock(altp2mlist)
--#define altp2m_list_lock(d) mm_lock(altp2mlist, &(d)->arch.altp2m_list_lock)
-+#define altp2m_list_lock(d) mm_lock(altp2mlist, d, \
-+ &(d)->arch.altp2m_list_lock)
- #define altp2m_list_unlock(d) mm_unlock(&(d)->arch.altp2m_list_lock)
-
- /* P2M lock (per-altp2m-table)
-@@ -294,9 +324,9 @@ declare_mm_rwlock(altp2m);
- static inline void p2m_lock(struct p2m_domain *p)
- {
- if ( p2m_is_altp2m(p) )
-- mm_write_lock(altp2m, &p->lock);
-+ mm_write_lock(altp2m, p->domain, &p->lock);
- else
-- mm_write_lock(p2m, &p->lock);
-+ mm_write_lock(p2m, p->domain, &p->lock);
- p->defer_flush++;
- }
-
-@@ -310,7 +340,7 @@ static inline void p2m_unlock(struct p2m_domain *p)
-
- #define gfn_lock(p,g,o) p2m_lock(p)
- #define gfn_unlock(p,g,o) p2m_unlock(p)
--#define p2m_read_lock(p) mm_read_lock(p2m, &(p)->lock)
-+#define p2m_read_lock(p) mm_read_lock(p2m, (p)->domain, &(p)->lock)
- #define p2m_read_unlock(p) mm_read_unlock(&(p)->lock)
- #define p2m_locked_by_me(p) mm_write_locked_by_me(&(p)->lock)
- #define gfn_locked_by_me(p,g) p2m_locked_by_me(p)
-@@ -322,7 +352,7 @@ static inline void p2m_unlock(struct p2m_domain *p)
-
- #define MM_LOCK_ORDER_pod 48
- declare_mm_lock(pod)
--#define pod_lock(p) mm_lock(pod, &(p)->pod.lock)
-+#define pod_lock(p) mm_lock(pod, (p)->domain, &(p)->pod.lock)
- #define pod_unlock(p) mm_unlock(&(p)->pod.lock)
- #define pod_locked_by_me(p) mm_locked_by_me(&(p)->pod.lock)
-
-@@ -335,8 +365,9 @@ declare_mm_lock(pod)
-
- #define MM_LOCK_ORDER_page_alloc 56
- declare_mm_order_constraint(page_alloc)
--#define page_alloc_mm_pre_lock() mm_enforce_order_lock_pre_page_alloc()
--#define page_alloc_mm_post_lock(l) mm_enforce_order_lock_post_page_alloc(&(l), NULL)
-+#define page_alloc_mm_pre_lock(d) mm_enforce_order_lock_pre_page_alloc(d)
-+#define page_alloc_mm_post_lock(d, l) \
-+ mm_enforce_order_lock_post_page_alloc(d, &(l), NULL)
- #define page_alloc_mm_unlock(l) mm_enforce_order_unlock((l), NULL)
-
- /* Paging lock (per-domain)
-@@ -356,9 +387,9 @@ declare_mm_order_constraint(page_alloc)
-
- #define MM_LOCK_ORDER_paging 64
- declare_mm_lock(paging)
--#define paging_lock(d) mm_lock(paging, &(d)->arch.paging.lock)
-+#define paging_lock(d) mm_lock(paging, d, &(d)->arch.paging.lock)
- #define paging_lock_recursive(d) \
-- mm_lock_recursive(paging, &(d)->arch.paging.lock)
-+ mm_lock_recursive(paging, d, &(d)->arch.paging.lock)
- #define paging_unlock(d) mm_unlock(&(d)->arch.paging.lock)
- #define paging_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.lock)
-
-diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
-index 631e9aec33..725a2921d9 100644
---- a/xen/arch/x86/mm/p2m-pod.c
-+++ b/xen/arch/x86/mm/p2m-pod.c
-@@ -34,9 +34,10 @@
- /* Enforce lock ordering when grabbing the "external" page_alloc lock */
- static inline void lock_page_alloc(struct p2m_domain *p2m)
- {
-- page_alloc_mm_pre_lock();
-+ page_alloc_mm_pre_lock(p2m->domain);
- spin_lock(&(p2m->domain->page_alloc_lock));
-- page_alloc_mm_post_lock(p2m->domain->arch.page_alloc_unlock_level);
-+ page_alloc_mm_post_lock(p2m->domain,
-+ p2m->domain->arch.page_alloc_unlock_level);
- }
-
- static inline void unlock_page_alloc(struct p2m_domain *p2m)
---
-2.17.2 (Apple Git-113)
-
diff --git a/emulators/xen-kernel/files/0004-hvm-mtrr-use-the-hardware-number-of-variable-ranges-.patch b/emulators/xen-kernel/files/0004-hvm-mtrr-use-the-hardware-number-of-variable-ranges-.patch
deleted file mode 100644
index 8dc0d3a6acc5..000000000000
--- a/emulators/xen-kernel/files/0004-hvm-mtrr-use-the-hardware-number-of-variable-ranges-.patch
+++ /dev/null
@@ -1,135 +0,0 @@
-From e520d9e144ac4766aaa7ce55f1c49191a5ddefc8 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 16 Jul 2018 15:10:09 +0200
-Subject: [PATCH 4/7] hvm/mtrr: use the hardware number of variable ranges for
- Dom0
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Expand the size of the variable ranges array to match the size of the
-underlying hardware, this is a preparatory change for copying the
-hardware MTRR state for Dom0.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/hvm/hvm.c | 12 +++++++++---
- xen/arch/x86/hvm/mtrr.c | 31 +++++++++++++++++++++++++++++--
- xen/include/asm-x86/mtrr.h | 3 +++
- 3 files changed, 41 insertions(+), 5 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
-index 3e2abeb274..c7eb943ed3 100644
---- a/xen/arch/x86/hvm/hvm.c
-+++ b/xen/arch/x86/hvm/hvm.c
-@@ -3476,10 +3476,13 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
- index = msr - MSR_MTRRfix4K_C0000;
- *msr_content = fixed_range_base[index + 3];
- break;
-- case MSR_IA32_MTRR_PHYSBASE(0)...MSR_IA32_MTRR_PHYSMASK(MTRR_VCNT-1):
-+ case MSR_IA32_MTRR_PHYSBASE(0)...MSR_IA32_MTRR_PHYSMASK(MTRR_VCNT_MAX - 1):
- if ( !d->arch.cpuid->basic.mtrr )
- goto gp_fault;
- index = msr - MSR_IA32_MTRR_PHYSBASE(0);
-+ if ( (index / 2) >=
-+ MASK_EXTR(v->arch.hvm_vcpu.mtrr.mtrr_cap, MTRRcap_VCNT) )
-+ goto gp_fault;
- *msr_content = var_range_base[index];
- break;
-
-@@ -3637,10 +3640,13 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content,
- index, msr_content) )
- goto gp_fault;
- break;
-- case MSR_IA32_MTRR_PHYSBASE(0)...MSR_IA32_MTRR_PHYSMASK(MTRR_VCNT-1):
-+ case MSR_IA32_MTRR_PHYSBASE(0)...MSR_IA32_MTRR_PHYSMASK(MTRR_VCNT_MAX - 1):
- if ( !d->arch.cpuid->basic.mtrr )
- goto gp_fault;
-- if ( !mtrr_var_range_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr,
-+ index = msr - MSR_IA32_MTRR_PHYSBASE(0);
-+ if ( ((index / 2) >=
-+ MASK_EXTR(v->arch.hvm_vcpu.mtrr.mtrr_cap, MTRRcap_VCNT)) ||
-+ !mtrr_var_range_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr,
- msr, msr_content) )
- goto gp_fault;
- break;
-diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
-index 7db0d473e8..4021d972fe 100644
---- a/xen/arch/x86/hvm/mtrr.c
-+++ b/xen/arch/x86/hvm/mtrr.c
-@@ -154,14 +154,26 @@ uint8_t pat_type_2_pte_flags(uint8_t pat_type)
- int hvm_vcpu_cacheattr_init(struct vcpu *v)
- {
- struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
-+ unsigned int num_var_ranges =
-+ is_hardware_domain(v->domain) ? MASK_EXTR(mtrr_state.mtrr_cap,
-+ MTRRcap_VCNT)
-+ : MTRR_VCNT;
-+
-+ if ( num_var_ranges > MTRR_VCNT_MAX )
-+ {
-+ ASSERT(is_hardware_domain(v->domain));
-+ printk("WARNING: limited Dom%u variable range MTRRs from %u to %u\n",
-+ v->domain->domain_id, num_var_ranges, MTRR_VCNT_MAX);
-+ num_var_ranges = MTRR_VCNT_MAX;
-+ }
-
- memset(m, 0, sizeof(*m));
-
-- m->var_ranges = xzalloc_array(struct mtrr_var_range, MTRR_VCNT);
-+ m->var_ranges = xzalloc_array(struct mtrr_var_range, num_var_ranges);
- if ( m->var_ranges == NULL )
- return -ENOMEM;
-
-- m->mtrr_cap = (1u << 10) | (1u << 8) | MTRR_VCNT;
-+ m->mtrr_cap = (1u << 10) | (1u << 8) | num_var_ranges;
-
- v->arch.hvm_vcpu.pat_cr =
- ((uint64_t)PAT_TYPE_WRBACK) | /* PAT0: WB */
-@@ -448,6 +460,12 @@ bool_t mtrr_var_range_msr_set(
- uint64_t *var_range_base = (uint64_t*)m->var_ranges;
-
- index = msr - MSR_IA32_MTRR_PHYSBASE(0);
-+ if ( (index / 2) >= MASK_EXTR(m->mtrr_cap, MTRRcap_VCNT) )
-+ {
-+ ASSERT_UNREACHABLE();
-+ return 0;
-+ }
-+
- if ( var_range_base[index] == msr_content )
- return 1;
-
-@@ -691,6 +709,15 @@ static int hvm_save_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
- };
- unsigned int i;
-
-+ if ( MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT) >
-+ (ARRAY_SIZE(hw_mtrr.msr_mtrr_var) / 2) )
-+ {
-+ dprintk(XENLOG_G_ERR,
-+ "HVM save: %pv: too many (%lu) variable range MTRRs\n",
-+ v, MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT));
-+ return -EINVAL;
-+ }
-+
- hvm_get_guest_pat(v, &hw_mtrr.msr_pat_cr);
-
- for ( i = 0; i < MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT); i++ )
-diff --git a/xen/include/asm-x86/mtrr.h b/xen/include/asm-x86/mtrr.h
-index b1f7af6396..72d0690e28 100644
---- a/xen/include/asm-x86/mtrr.h
-+++ b/xen/include/asm-x86/mtrr.h
-@@ -39,6 +39,9 @@ typedef u8 mtrr_type;
- #define MTRR_PHYSBASE_SHIFT 12
- /* Number of variable range MSR pairs we emulate for HVM guests: */
- #define MTRR_VCNT 8
-+/* Maximum number of variable range MSR pairs if FE is supported. */
-+#define MTRR_VCNT_MAX ((MSR_MTRRfix64K_00000 - \
-+ MSR_IA32_MTRR_PHYSBASE(0)) / 2)
-
- struct mtrr_var_range {
- uint64_t base;
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0005-hvm-mtrr-copy-hardware-state-for-Dom0.patch b/emulators/xen-kernel/files/0005-hvm-mtrr-copy-hardware-state-for-Dom0.patch
deleted file mode 100644
index ab3b8c55027f..000000000000
--- a/emulators/xen-kernel/files/0005-hvm-mtrr-copy-hardware-state-for-Dom0.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-From d8b0840bb90711e93b6994e50c728bbbf0f012a0 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 16 Jul 2018 15:10:49 +0200
-Subject: [PATCH 5/7] hvm/mtrr: copy hardware state for Dom0
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Copy the state found on the hardware when creating a PVH Dom0. Since
-the memory map provided to a PVH Dom0 is based on the native one using
-the same set of MTRR ranges should provide Dom0 with a sane MTRR state
-without having to manually build it in Xen.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/hvm/mtrr.c | 26 ++++++++++++++++++++++++++
- 1 file changed, 26 insertions(+)
-
-diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
-index 4021d972fe..2b00993a7b 100644
---- a/xen/arch/x86/hvm/mtrr.c
-+++ b/xen/arch/x86/hvm/mtrr.c
-@@ -185,6 +185,32 @@ int hvm_vcpu_cacheattr_init(struct vcpu *v)
- ((uint64_t)PAT_TYPE_UC_MINUS << 48) | /* PAT6: UC- */
- ((uint64_t)PAT_TYPE_UNCACHABLE << 56); /* PAT7: UC */
-
-+ if ( is_hardware_domain(v->domain) )
-+ {
-+ /* Copy values from the host. */
-+ struct domain *d = v->domain;
-+ unsigned int i;
-+
-+ if ( mtrr_state.have_fixed )
-+ for ( i = 0; i < NUM_FIXED_MSR; i++ )
-+ mtrr_fix_range_msr_set(d, m, i,
-+ ((uint64_t *)mtrr_state.fixed_ranges)[i]);
-+
-+ for ( i = 0; i < num_var_ranges; i++ )
-+ {
-+ mtrr_var_range_msr_set(d, m, MSR_IA32_MTRR_PHYSBASE(i),
-+ mtrr_state.var_ranges[i].base);
-+ mtrr_var_range_msr_set(d, m, MSR_IA32_MTRR_PHYSMASK(i),
-+ mtrr_state.var_ranges[i].mask);
-+ }
-+
-+ mtrr_def_type_msr_set(d, m,
-+ mtrr_state.def_type |
-+ MASK_INSR(mtrr_state.fixed_enabled,
-+ MTRRdefType_FE) |
-+ MASK_INSR(mtrr_state.enabled, MTRRdefType_E));
-+ }
-+
- return 0;
- }
-
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0006-libxc-pvh-set-default-MTRR-type-to-write-back.patch b/emulators/xen-kernel/files/0006-libxc-pvh-set-default-MTRR-type-to-write-back.patch
deleted file mode 100644
index c9a09ca1a24f..000000000000
--- a/emulators/xen-kernel/files/0006-libxc-pvh-set-default-MTRR-type-to-write-back.patch
+++ /dev/null
@@ -1,104 +0,0 @@
-From a1c1ae0b0f5b30b5b928e45349086ec00930bccf Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 16 Jul 2018 15:11:22 +0200
-Subject: [PATCH 6/7] libxc/pvh: set default MTRR type to write-back
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-And enable MTRR. This allows to provide a sane initial MTRR state for
-PVH DomUs. This will have to be expanded when pci-passthrough support
-is added to PVH guests, so that MMIO regions of devices are set as
-UC.
-
-Note that initial MTRR setup is done by hvmloader for HVM guests,
-that's not used by PVH guests.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Acked-by: Wei Liu <wei.liu2@citrix.com>
----
- tools/libxc/xc_dom_x86.c | 44 ++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 44 insertions(+)
-
-diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
-index e33a28847d..d28ff4d7e9 100644
---- a/tools/libxc/xc_dom_x86.c
-+++ b/tools/libxc/xc_dom_x86.c
-@@ -53,6 +53,9 @@
- #define X86_CR0_PE 0x01
- #define X86_CR0_ET 0x10
-
-+#define MTRR_TYPE_WRBACK 6
-+#define MTRR_DEF_TYPE_ENABLE (1u << 11)
-+
- #define SPECIALPAGE_PAGING 0
- #define SPECIALPAGE_ACCESS 1
- #define SPECIALPAGE_SHARING 2
-@@ -931,6 +934,20 @@ static int vcpu_x86_64(struct xc_dom_image *dom)
- return rc;
- }
-
-+const static void *hvm_get_save_record(const void *ctx, unsigned int type,
-+ unsigned int instance)
-+{
-+ const struct hvm_save_descriptor *header;
-+
-+ for ( header = ctx;
-+ header->typecode != HVM_SAVE_CODE(END);
-+ ctx += sizeof(*header) + header->length, header = ctx )
-+ if ( header->typecode == type && header->instance == instance )
-+ return ctx + sizeof(*header);
-+
-+ return NULL;
-+}
-+
- static int vcpu_hvm(struct xc_dom_image *dom)
- {
- struct {
-@@ -938,9 +955,12 @@ static int vcpu_hvm(struct xc_dom_image *dom)
- HVM_SAVE_TYPE(HEADER) header;
- struct hvm_save_descriptor cpu_d;
- HVM_SAVE_TYPE(CPU) cpu;
-+ struct hvm_save_descriptor mtrr_d;
-+ HVM_SAVE_TYPE(MTRR) mtrr;
- struct hvm_save_descriptor end_d;
- HVM_SAVE_TYPE(END) end;
- } bsp_ctx;
-+ const HVM_SAVE_TYPE(MTRR) *mtrr_record;
- uint8_t *full_ctx = NULL;
- int rc;
-
-@@ -1014,6 +1034,30 @@ static int vcpu_hvm(struct xc_dom_image *dom)
- if ( dom->start_info_seg.pfn )
- bsp_ctx.cpu.rbx = dom->start_info_seg.pfn << PAGE_SHIFT;
-
-+ /* Set the MTRR. */
-+ bsp_ctx.mtrr_d.typecode = HVM_SAVE_CODE(MTRR);
-+ bsp_ctx.mtrr_d.instance = 0;
-+ bsp_ctx.mtrr_d.length = HVM_SAVE_LENGTH(MTRR);
-+
-+ mtrr_record = hvm_get_save_record(full_ctx, HVM_SAVE_CODE(MTRR), 0);
-+ if ( !mtrr_record )
-+ {
-+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-+ "%s: unable to get MTRR save record", __func__);
-+ goto out;
-+ }
-+
-+ memcpy(&bsp_ctx.mtrr, mtrr_record, sizeof(bsp_ctx.mtrr));
-+
-+ /* TODO: maybe this should be a firmware option instead? */
-+ if ( !dom->device_model )
-+ /*
-+ * Enable MTRR, set default type to WB.
-+ * TODO: add MMIO areas as UC when passthrough is supported.
-+ */
-+ bsp_ctx.mtrr.msr_mtrr_def_type = MTRR_TYPE_WRBACK |
-+ MTRR_DEF_TYPE_ENABLE;
-+
- /* Set the end descriptor. */
- bsp_ctx.end_d.typecode = HVM_SAVE_CODE(END);
- bsp_ctx.end_d.instance = 0;
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/0007-docs-pvh-document-initial-MTRR-state.patch b/emulators/xen-kernel/files/0007-docs-pvh-document-initial-MTRR-state.patch
deleted file mode 100644
index 4be6edb9cdd7..000000000000
--- a/emulators/xen-kernel/files/0007-docs-pvh-document-initial-MTRR-state.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 565efbc8a7145c47379543edfcc84fc4f4dd6d83 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Mon, 16 Jul 2018 15:11:42 +0200
-Subject: [PATCH 7/7] docs/pvh: document initial MTRR state
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Provided to both Dom0 and DomUs.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
----
- docs/misc/pvh.markdown | 18 ++++++++++++++++++
- 1 file changed, 18 insertions(+)
-
-diff --git a/docs/misc/pvh.markdown b/docs/misc/pvh.markdown
-index e85fb15374..1c9a00b48a 100644
---- a/docs/misc/pvh.markdown
-+++ b/docs/misc/pvh.markdown
-@@ -92,3 +92,21 @@ event channels. Delivery of those interrupts can be configured in the same way
- as HVM guests, check xen/include/public/hvm/params.h and
- xen/include/public/hvm/hvm\_op.h for more information about available delivery
- methods.
-+
-+## MTRR ##
-+
-+### Unprivileged guests ###
-+
-+PVH guests are currently booted with the default MTRR type set to write-back
-+and MTRR enabled. This allows DomUs to start with a sane MTRR state. Note that
-+this will have to be revisited when pci-passthrough is added to PVH in order to
-+set MMIO regions as UC.
-+
-+Xen guarantees that RAM regions will always have the WB cache type set in the
-+initial MTRR state, either set by the default MTRR type or by other means.
-+
-+### Hardware domain ###
-+
-+A PVH hardware domain is booted with the same MTRR state as the one found on
-+the host. This is done because the hardware domain memory map is already a
-+modified copy of the host memory map, so the same MTRR setup should work.
---
-2.18.0
-
diff --git a/emulators/xen-kernel/files/xen.4th b/emulators/xen-kernel/files/xen.4th
deleted file mode 100644
index a74d03c28e95..000000000000
--- a/emulators/xen-kernel/files/xen.4th
+++ /dev/null
@@ -1,99 +0,0 @@
-\ Copyright (c) 2015 Devin Teske <dteske@FreeBSD.org>
-\ All rights reserved.
-\
-\ Redistribution and use in source and binary forms, with or without
-\ modification, are permitted provided that the following conditions
-\ are met:
-\ 1. Redistributions of source code must retain the above copyright
-\ notice, this list of conditions and the following disclaimer.
-\ 2. Redistributions in binary form must reproduce the above copyright
-\ notice, this list of conditions and the following disclaimer in the
-\ documentation and/or other materials provided with the distribution.
-\
-\ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-\ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-\ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-\ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-\ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-\ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-\ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-\ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-\ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-\ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-\ SUCH DAMAGE.
-\
-\ $FreeBSD$
-
-create xenkerndefault 64 allot
-0 xenkerndefault c!
-
-also menu-command-helpers
-also menu-namespace
-
-: init_xen_active ( N -- N )
- s" [X]en Kernel.. off" 2 pick menu_caption[x] setenv
- s" [X]en Kernel.. On" 2 pick toggled_text[x] setenv
- s" toggle_xen" 2 pick menu_command[x] setenv
- s" 120" 2 pick menu_keycode[x] setenv
- s" ^[1mX^[men Kernel.. ^[34;1mOff^[m" 2 pick ansi_caption[x] setenv
- s" ^[1mX^[men Kernel.. ^[32;7mOn^[m" 2 pick toggled_ansi[x] setenv
-;
-
-: init_xen_inactive ( N -- N )
- s" Xen Kernel.. N/A" 2dup
- 4 pick menu_caption[x] setenv
- 2 pick ansi_caption[x] setenv
- s" true" 2 pick menu_command[x] setenv
-;
-
-: init_xen ( -- )
- s" optionsmenu_options" getenv 0> if
- c@ dup [char] 0 > over [char] 9 < and false = if
- drop [char] 0
- then
- 1+
- else
- [char] 1
- then
- begin
- dup [char] 8 > if
- false ( break )
- else
- dup s" optionsmenu_caption[x]" 20 +c! getenv -1 = if
- false ( break )
- else
- drop true
- then
- then
- while
- 1+
- repeat
-
- s" xen_kernel" getenv dup -1 <> over 0> and if
- xenkerndefault 1+ 0 2swap strcat swap 1- c!
- init_xen_active ( n -- n )
- toggle_menuitem ( n -- n )
- else
- drop
- xenkerndefault c@ 0<> if
- init_xen_active ( n -- n )
- else
- init_xen_inactive ( n -- n )
- then
- then
-;
-
-: toggle_xen ( N -- N TRUE )
- toggle_menuitem ( n -- n )
- menu-redraw
-
- dup toggle_stateN @ 0= if
- s" xen_kernel" unsetenv
- else
- xenkerndefault count s" xen_kernel" setenv
- then
-
- TRUE \ loop menu again
-;
-
-set optionsmenu_init="$optionsmenu_init init_xen"
diff --git a/emulators/xen-kernel/files/xsa284.patch b/emulators/xen-kernel/files/xsa284.patch
deleted file mode 100644
index 0b5dcd0f029c..000000000000
--- a/emulators/xen-kernel/files/xsa284.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: gnttab: set page refcount for copy-on-grant-transfer
-
-Commit 5cc77f9098 ("32-on-64: Fix domain address-size clamping,
-implement"), which introduced this functionality, took care of clearing
-the old page's PGC_allocated, but failed to set the bit (and install the
-associated reference) on the newly allocated one. Furthermore the "mfn"
-local variable was never updated, and hence the wrong MFN was passed to
-guest_physmap_add_page() (and back to the destination domain) in this
-case, leading to an IOMMU mapping into an unowned page.
-
-Ideally the code would use assign_pages(), but the call to
-gnttab_prepare_for_transfer() sits in the middle of the actions
-mirroring that function.
-
-This is XSA-284.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: George Dunlap <george.dunlap@citrix.com>
-
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -2183,6 +2183,8 @@ gnttab_transfer(
- page->count_info &= ~(PGC_count_mask|PGC_allocated);
- free_domheap_page(page);
- page = new_page;
-+ page->count_info = PGC_allocated | 1;
-+ mfn = page_to_mfn(page);
- }
-
- spin_lock(&e->page_alloc_lock);
diff --git a/emulators/xen-kernel/files/xsa287-4.11.patch b/emulators/xen-kernel/files/xsa287-4.11.patch
deleted file mode 100644
index 8563560e151f..000000000000
--- a/emulators/xen-kernel/files/xsa287-4.11.patch
+++ /dev/null
@@ -1,328 +0,0 @@
-From 67620c1ccb13f7b58645f48248ba1f408b021fdc Mon Sep 17 00:00:00 2001
-From: George Dunlap <george.dunlap@citrix.com>
-Date: Fri, 18 Jan 2019 15:00:34 +0000
-Subject: [PATCH] steal_page: Get rid of bogus struct page states
-
-The original rules for `struct page` required the following invariants
-at all times:
-
-- refcount > 0 implies owner != NULL
-- PGC_allocated implies refcount > 0
-
-steal_page, in a misguided attempt to protect against unknown races,
-violates both of these rules, thus introducing other races:
-
-- Temporarily, the count_info has the refcount go to 0 while
- PGC_allocated is set
-
-- It explicitly returns the page PGC_allocated set, but owner == NULL
- and page not on the page_list.
-
-The second one meant that page_get_owner_and_reference() could return
-NULL even after having successfully grabbed a reference on the page,
-leading the caller to leak the reference (since "couldn't get ref" and
-"got ref but no owner" look the same).
-
-Furthermore, rather than grabbing a page reference to ensure that the
-owner doesn't change under its feet, it appears to rely on holding
-d->page_alloc lock to prevent this.
-
-Unfortunately, this is ineffective: page->owner remains non-NULL for
-some time after the count has been set to 0; meaning that it would be
-entirely possible for the page to be freed and re-allocated to a
-different domain between the page_get_owner() check and the count_info
-check.
-
-Modify steal_page to instead follow the appropriate access discipline,
-taking the page through series of states similar to being freed and
-then re-allocated with MEMF_no_owner:
-
-- Grab an extra reference to make sure we don't race with anyone else
- freeing the page
-
-- Drop both references and PGC_allocated atomically, so that (if
-successful), anyone else trying to grab a reference will fail
-
-- Attempt to reset Xen's mappings
-
-- Reset the rest of the state.
-
-Then, modify the two callers appropriately:
-
-- Leave count_info alone (it's already been cleared)
-- Call free_domheap_page() directly if appropriate
-- Call assign_pages() rather than open-coding a partial assign
-
-With all callers to assign_pages() now passing in pages with the
-type_info field clear, tighten the respective assertion there.
-
-This is XSA-287.
-
-Signed-off-by: George Dunlap <george.dunlap@citrix.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/mm.c | 84 ++++++++++++++++++++++++++++------------
- xen/common/grant_table.c | 20 +++++-----
- xen/common/memory.c | 19 +++++----
- xen/common/page_alloc.c | 2 +-
- 4 files changed, 83 insertions(+), 42 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 6509035a5c..d8ff58c901 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -3966,70 +3966,106 @@ int donate_page(
- return -EINVAL;
- }
-
-+/*
-+ * Steal page will attempt to remove `page` from domain `d`. Upon
-+ * return, `page` will be in a state similar to the state of a page
-+ * returned from alloc_domheap_page() with MEMF_no_owner set:
-+ * - refcount 0
-+ * - type count cleared
-+ * - owner NULL
-+ * - page caching attributes cleaned up
-+ * - removed from the domain's page_list
-+ *
-+ * If MEMF_no_refcount is not set, the domain's tot_pages will be
-+ * adjusted. If this results in the page count falling to 0,
-+ * put_domain() will be called.
-+ *
-+ * The caller should either call free_domheap_page() to free the
-+ * page, or assign_pages() to put it back on some domain's page list.
-+ */
- int steal_page(
- struct domain *d, struct page_info *page, unsigned int memflags)
- {
- unsigned long x, y;
- bool drop_dom_ref = false;
-- const struct domain *owner = dom_xen;
-+ const struct domain *owner;
-+ int rc;
-
- if ( paging_mode_external(d) )
- return -EOPNOTSUPP;
-
-- spin_lock(&d->page_alloc_lock);
--
-- if ( is_xen_heap_page(page) || ((owner = page_get_owner(page)) != d) )
-+ /* Grab a reference to make sure the page doesn't change under our feet */
-+ rc = -EINVAL;
-+ if ( !(owner = page_get_owner_and_reference(page)) )
- goto fail;
-
-+ if ( owner != d || is_xen_heap_page(page) )
-+ goto fail_put;
-+
- /*
-- * We require there is just one reference (PGC_allocated). We temporarily
-- * drop this reference now so that we can safely swizzle the owner.
-+ * We require there are exactly two references -- the one we just
-+ * took, and PGC_allocated. We temporarily drop both these
-+ * references so that the page becomes effectively non-"live" for
-+ * the domain.
- */
- y = page->count_info;
- do {
- x = y;
-- if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) )
-- goto fail;
-- y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask);
-+ if ( (x & (PGC_count_mask|PGC_allocated)) != (2 | PGC_allocated) )
-+ goto fail_put;
-+ y = cmpxchg(&page->count_info, x, x & ~(PGC_count_mask|PGC_allocated));
- } while ( y != x );
-
- /*
-- * With the sole reference dropped temporarily, no-one can update type
-- * information. Type count also needs to be zero in this case, but e.g.
-- * PGT_seg_desc_page may still have PGT_validated set, which we need to
-- * clear before transferring ownership (as validation criteria vary
-- * depending on domain type).
-+ * NB this is safe even if the page ends up being given back to
-+ * the domain, because the count is zero: subsequent mappings will
-+ * cause the cache attributes to be re-instated inside
-+ * get_page_from_l1e().
-+ */
-+ if ( (rc = cleanup_page_cacheattr(page)) )
-+ {
-+ /*
-+ * Couldn't fixup Xen's mappings; put things the way we found
-+ * it and return an error
-+ */
-+ page->count_info |= PGC_allocated | 1;
-+ goto fail;
-+ }
-+
-+ /*
-+ * With the reference count now zero, nobody can grab references
-+ * to do anything else with the page. Return the page to a state
-+ * that it might be upon return from alloc_domheap_pages with
-+ * MEMF_no_owner set.
- */
-+ spin_lock(&d->page_alloc_lock);
-+
- BUG_ON(page->u.inuse.type_info & (PGT_count_mask | PGT_locked |
- PGT_pinned));
- page->u.inuse.type_info = 0;
--
-- /* Swizzle the owner then reinstate the PGC_allocated reference. */
- page_set_owner(page, NULL);
-- y = page->count_info;
-- do {
-- x = y;
-- BUG_ON((x & (PGC_count_mask|PGC_allocated)) != PGC_allocated);
-- } while ( (y = cmpxchg(&page->count_info, x, x | 1)) != x );
-+ page_list_del(page, &d->page_list);
-
- /* Unlink from original owner. */
- if ( !(memflags & MEMF_no_refcount) && !domain_adjust_tot_pages(d, -1) )
- drop_dom_ref = true;
-- page_list_del(page, &d->page_list);
-
- spin_unlock(&d->page_alloc_lock);
-+
- if ( unlikely(drop_dom_ref) )
- put_domain(d);
-+
- return 0;
-
-+ fail_put:
-+ put_page(page);
- fail:
-- spin_unlock(&d->page_alloc_lock);
- gdprintk(XENLOG_WARNING, "Bad steal mfn %" PRI_mfn
- " from d%d (owner d%d) caf=%08lx taf=%" PRtype_info "\n",
- mfn_x(page_to_mfn(page)), d->domain_id,
- owner ? owner->domain_id : DOMID_INVALID,
- page->count_info, page->u.inuse.type_info);
-- return -EINVAL;
-+ return rc;
- }
-
- static int __do_update_va_mapping(
-diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
-index c0585d33f4..656fad1b42 100644
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -2179,7 +2179,7 @@ gnttab_transfer(
- rcu_unlock_domain(e);
- put_gfn_and_copyback:
- put_gfn(d, gop.mfn);
-- page->count_info &= ~(PGC_count_mask|PGC_allocated);
-+ /* The count_info has already been cleaned */
- free_domheap_page(page);
- goto copyback;
- }
-@@ -2202,10 +2202,9 @@ gnttab_transfer(
-
- copy_domain_page(page_to_mfn(new_page), mfn);
-
-- page->count_info &= ~(PGC_count_mask|PGC_allocated);
-+ /* The count_info has already been cleared */
- free_domheap_page(page);
- page = new_page;
-- page->count_info = PGC_allocated | 1;
- mfn = page_to_mfn(page);
- }
-
-@@ -2245,12 +2244,17 @@ gnttab_transfer(
- */
- spin_unlock(&e->page_alloc_lock);
- okay = gnttab_prepare_for_transfer(e, d, gop.ref);
-- spin_lock(&e->page_alloc_lock);
-
-- if ( unlikely(!okay) || unlikely(e->is_dying) )
-+ if ( unlikely(!okay || assign_pages(e, page, 0, MEMF_no_refcount)) )
- {
-- bool_t drop_dom_ref = !domain_adjust_tot_pages(e, -1);
-+ bool drop_dom_ref;
-
-+ /*
-+ * Need to grab this again to safely free our "reserved"
-+ * page in the page total
-+ */
-+ spin_lock(&e->page_alloc_lock);
-+ drop_dom_ref = !domain_adjust_tot_pages(e, -1);
- spin_unlock(&e->page_alloc_lock);
-
- if ( okay /* i.e. e->is_dying due to the surrounding if() */ )
-@@ -2263,10 +2267,6 @@ gnttab_transfer(
- goto unlock_and_copyback;
- }
-
-- page_list_add_tail(page, &e->page_list);
-- page_set_owner(page, e);
--
-- spin_unlock(&e->page_alloc_lock);
- put_gfn(d, gop.mfn);
-
- TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id);
-diff --git a/xen/common/memory.c b/xen/common/memory.c
-index 4fb7962c79..f71163221f 100644
---- a/xen/common/memory.c
-+++ b/xen/common/memory.c
-@@ -675,20 +675,22 @@ static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
- * Success! Beyond this point we cannot fail for this chunk.
- */
-
-- /* Destroy final reference to each input page. */
-+ /*
-+ * These pages have already had owner and reference cleared.
-+ * Do the final two steps: Remove from the physmap, and free
-+ * them.
-+ */
- while ( (page = page_list_remove_head(&in_chunk_list)) )
- {
- unsigned long gfn;
-
-- if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
-- BUG();
- mfn = page_to_mfn(page);
- gfn = mfn_to_gmfn(d, mfn_x(mfn));
- /* Pages were unshared above */
- BUG_ON(SHARED_M2P(gfn));
- if ( guest_physmap_remove_page(d, _gfn(gfn), mfn, 0) )
- domain_crash(d);
-- put_page(page);
-+ free_domheap_page(page);
- }
-
- /* Assign each output page to the domain. */
-@@ -761,13 +763,16 @@ static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
- * chunks succeeded.
- */
- fail:
-- /* Reassign any input pages we managed to steal. */
-+ /*
-+ * Reassign any input pages we managed to steal. NB that if the assign
-+ * fails again, we're on the hook for freeing the page, since we've already
-+ * cleared PGC_allocated.
-+ */
- while ( (page = page_list_remove_head(&in_chunk_list)) )
- if ( assign_pages(d, page, 0, MEMF_no_refcount) )
- {
- BUG_ON(!d->is_dying);
-- if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
-- put_page(page);
-+ free_domheap_page(page);
- }
-
- dying:
-diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
-index 482f0988f7..52da7762e3 100644
---- a/xen/common/page_alloc.c
-+++ b/xen/common/page_alloc.c
-@@ -2221,7 +2221,7 @@ int assign_pages(
- for ( i = 0; i < (1 << order); i++ )
- {
- ASSERT(page_get_owner(&pg[i]) == NULL);
-- ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
-+ ASSERT(!pg[i].count_info);
- page_set_owner(&pg[i], d);
- smp_wmb(); /* Domain pointer must be visible before updating refcnt. */
- pg[i].count_info = PGC_allocated | 1;
---
-2.20.1
-
diff --git a/emulators/xen-kernel/files/xsa290-4.11-1.patch b/emulators/xen-kernel/files/xsa290-4.11-1.patch
deleted file mode 100644
index 38ddb56cf29c..000000000000
--- a/emulators/xen-kernel/files/xsa290-4.11-1.patch
+++ /dev/null
@@ -1,237 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/mm: also allow L2 (un)validation to be preemptible
-
-Commit c612481d1c ("x86/mm: Plumbing to allow any PTE update to fail
-with -ERESTART") added assertions next to the {alloc,free}_l2_table()
-invocations to document (and validate in debug builds) that L2
-(un)validations are always preemptible.
-
-The assertion in free_page_type() was now observed to trigger when
-recursive L2 page tables get cleaned up.
-
-In particular put_page_from_l2e()'s assumption that _put_page_type()
-would always succeed is now wrong, resulting in a partially un-validated
-page left in a domain, which has no other means of getting cleaned up
-later on. If not causing any problems earlier, this would ultimately
-trigger the check for ->u.inuse.type_info having a zero count when
-freeing the page during cleanup after the domain has died.
-
-As a result it should be considered a mistake to not have extended
-preemption fully to L2 when it was added to L3/L4 table handling, which
-this change aims to correct.
-
-The validation side additions are done just for symmetry.
-
-This is part of XSA-290.
-
-Reported-by: Manuel Bouyer <bouyer@antioche.eu.org>
-Tested-by: Manuel Bouyer <bouyer@antioche.eu.org>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -1126,7 +1126,7 @@ get_page_from_l1e(
- define_get_linear_pagetable(l2);
- static int
- get_page_from_l2e(
-- l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
-+ l2_pgentry_t l2e, unsigned long pfn, struct domain *d, int partial)
- {
- unsigned long mfn = l2e_get_pfn(l2e);
- int rc;
-@@ -1141,7 +1141,8 @@ get_page_from_l2e(
- return -EINVAL;
- }
-
-- rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d, 0, 0);
-+ rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d,
-+ partial, false);
- if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
- rc = 0;
-
-@@ -1295,8 +1296,11 @@ void put_page_from_l1e(l1_pgentry_t l1e,
- * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
- * Note also that this automatically deals correctly with linear p.t.'s.
- */
--static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
-+static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
-+ int partial, bool defer)
- {
-+ int rc = 0;
-+
- if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) )
- return 1;
-
-@@ -1311,13 +1315,27 @@ static int put_page_from_l2e(l2_pgentry_
- else
- {
- struct page_info *pg = l2e_get_page(l2e);
-- int rc = _put_page_type(pg, false, mfn_to_page(_mfn(pfn)));
-+ struct page_info *ptpg = mfn_to_page(_mfn(pfn));
-
-- ASSERT(!rc);
-- put_page(pg);
-+ if ( unlikely(partial > 0) )
-+ {
-+ ASSERT(!defer);
-+ rc = _put_page_type(pg, true, ptpg);
-+ }
-+ else if ( defer )
-+ {
-+ current->arch.old_guest_ptpg = ptpg;
-+ current->arch.old_guest_table = pg;
-+ }
-+ else
-+ {
-+ rc = _put_page_type(pg, true, ptpg);
-+ if ( likely(!rc) )
-+ put_page(pg);
-+ }
- }
-
-- return 0;
-+ return rc;
- }
-
- static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
-@@ -1487,11 +1505,12 @@ static int alloc_l2_table(struct page_in
- unsigned long pfn = mfn_x(page_to_mfn(page));
- l2_pgentry_t *pl2e;
- unsigned int i;
-- int rc = 0;
-+ int rc = 0, partial = page->partial_pte;
-
- pl2e = map_domain_page(_mfn(pfn));
-
-- for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ )
-+ for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES;
-+ i++, partial = 0 )
- {
- if ( i > page->nr_validated_ptes && hypercall_preempt_check() )
- {
-@@ -1501,23 +1520,33 @@ static int alloc_l2_table(struct page_in
- }
-
- if ( !is_guest_l2_slot(d, type, i) ||
-- (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 )
-+ (rc = get_page_from_l2e(pl2e[i], pfn, d, partial)) > 0 )
- continue;
-
-- if ( unlikely(rc == -ERESTART) )
-+ if ( rc == -ERESTART )
- {
- page->nr_validated_ptes = i;
-- break;
-+ page->partial_pte = partial ?: 1;
- }
--
-- if ( rc < 0 )
-+ else if ( rc == -EINTR && i )
-+ {
-+ page->nr_validated_ptes = i;
-+ page->partial_pte = 0;
-+ rc = -ERESTART;
-+ }
-+ else if ( rc < 0 && rc != -EINTR )
- {
- gdprintk(XENLOG_WARNING, "Failure in alloc_l2_table: slot %#x\n", i);
-- while ( i-- > 0 )
-- if ( is_guest_l2_slot(d, type, i) )
-- put_page_from_l2e(pl2e[i], pfn);
-- break;
-+ if ( i )
-+ {
-+ page->nr_validated_ptes = i;
-+ page->partial_pte = 0;
-+ current->arch.old_guest_ptpg = NULL;
-+ current->arch.old_guest_table = page;
-+ }
- }
-+ if ( rc < 0 )
-+ break;
-
- pl2e[i] = adjust_guest_l2e(pl2e[i], d);
- }
-@@ -1797,28 +1826,50 @@ static int free_l2_table(struct page_inf
- struct domain *d = page_get_owner(page);
- unsigned long pfn = mfn_x(page_to_mfn(page));
- l2_pgentry_t *pl2e;
-- unsigned int i = page->nr_validated_ptes - 1;
-- int err = 0;
-+ int rc = 0, partial = page->partial_pte;
-+ unsigned int i = page->nr_validated_ptes - !partial;
-
- pl2e = map_domain_page(_mfn(pfn));
-
-- ASSERT(page->nr_validated_ptes);
-- do {
-- if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) &&
-- put_page_from_l2e(pl2e[i], pfn) == 0 &&
-- i && hypercall_preempt_check() )
-+ for ( ; ; )
-+ {
-+ if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
-+ rc = put_page_from_l2e(pl2e[i], pfn, partial, false);
-+ if ( rc < 0 )
-+ break;
-+
-+ partial = 0;
-+
-+ if ( !i-- )
-+ break;
-+
-+ if ( hypercall_preempt_check() )
- {
-- page->nr_validated_ptes = i;
-- err = -ERESTART;
-+ rc = -EINTR;
-+ break;
- }
-- } while ( !err && i-- );
-+ }
-
- unmap_domain_page(pl2e);
-
-- if ( !err )
-+ if ( rc >= 0 )
-+ {
- page->u.inuse.type_info &= ~PGT_pae_xen_l2;
-+ rc = 0;
-+ }
-+ else if ( rc == -ERESTART )
-+ {
-+ page->nr_validated_ptes = i;
-+ page->partial_pte = partial ?: -1;
-+ }
-+ else if ( rc == -EINTR && i < L2_PAGETABLE_ENTRIES - 1 )
-+ {
-+ page->nr_validated_ptes = i + 1;
-+ page->partial_pte = 0;
-+ rc = -ERESTART;
-+ }
-
-- return err;
-+ return rc;
- }
-
- static int free_l3_table(struct page_info *page)
-@@ -2138,7 +2189,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
- return -EBUSY;
- }
-
-- if ( unlikely((rc = get_page_from_l2e(nl2e, pfn, d)) < 0) )
-+ if ( unlikely((rc = get_page_from_l2e(nl2e, pfn, d, 0)) < 0) )
- return rc;
-
- nl2e = adjust_guest_l2e(nl2e, d);
-@@ -2157,7 +2208,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
- return -EBUSY;
- }
-
-- put_page_from_l2e(ol2e, pfn);
-+ put_page_from_l2e(ol2e, pfn, 0, true);
-+
- return rc;
- }
-
diff --git a/emulators/xen-kernel/files/xsa290-4.11-2.patch b/emulators/xen-kernel/files/xsa290-4.11-2.patch
deleted file mode 100644
index 9355e02f82ff..000000000000
--- a/emulators/xen-kernel/files/xsa290-4.11-2.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/mm: add explicit preemption checks to L3 (un)validation
-
-When recursive page tables are used at the L3 level, unvalidation of a
-single L4 table may incur unvalidation of two levels of L3 tables, i.e.
-a maximum iteration count of 512^3 for unvalidating an L4 table. The
-preemption check in free_l2_table() as well as the one in
-_put_page_type() may never be reached, so explicit checking is needed in
-free_l3_table().
-
-When recursive page tables are used at the L4 level, the iteration count
-at L4 alone is capped at 512^2. As soon as a present L3 entry is hit
-which itself needs unvalidation (and hence requiring another nested loop
-with 512 iterations), the preemption checks added here kick in, so no
-further preemption checking is needed at L4 (until we decide to permit
-5-level paging for PV guests).
-
-The validation side additions are done just for symmetry.
-
-This is part of XSA-290.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -1581,6 +1581,13 @@ static int alloc_l3_table(struct page_in
- for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
- i++, partial = 0 )
- {
-+ if ( i > page->nr_validated_ptes && hypercall_preempt_check() )
-+ {
-+ page->nr_validated_ptes = i;
-+ rc = -ERESTART;
-+ break;
-+ }
-+
- if ( is_pv_32bit_domain(d) && (i == 3) )
- {
- if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
-@@ -1882,15 +1889,25 @@ static int free_l3_table(struct page_inf
-
- pl3e = map_domain_page(_mfn(pfn));
-
-- do {
-+ for ( ; ; )
-+ {
- rc = put_page_from_l3e(pl3e[i], pfn, partial, 0);
- if ( rc < 0 )
- break;
-+
- partial = 0;
-- if ( rc > 0 )
-- continue;
-- pl3e[i] = unadjust_guest_l3e(pl3e[i], d);
-- } while ( i-- );
-+ if ( rc == 0 )
-+ pl3e[i] = unadjust_guest_l3e(pl3e[i], d);
-+
-+ if ( !i-- )
-+ break;
-+
-+ if ( hypercall_preempt_check() )
-+ {
-+ rc = -EINTR;
-+ break;
-+ }
-+ }
-
- unmap_domain_page(pl3e);
-
diff --git a/emulators/xen-kernel/files/xsa292.patch b/emulators/xen-kernel/files/xsa292.patch
deleted file mode 100644
index 198fd4fdd4d8..000000000000
--- a/emulators/xen-kernel/files/xsa292.patch
+++ /dev/null
@@ -1,95 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/mm: properly flush TLB in switch_cr3_cr4()
-
-The CR3 values used for contexts run with PCID enabled uniformly have
-CR3.NOFLUSH set, resulting in the CR3 write itself to not cause any
-flushing at all. When the second CR4 write is skipped or doesn't do any
-flushing, there's nothing so far which would purge TLB entries which may
-have accumulated again if the PCID doesn't change; the "just in case"
-flush only affects the case where the PCID actually changes. (There may
-be particularly many TLB entries re-accumulated in case of a watchdog
-NMI kicking in during the critical time window.)
-
-Suppress the no-flush behavior of the CR3 write in this particular case.
-
-Similarly the second CR4 write may not cause any flushing of TLB entries
-established again while the original PCID was still in use - it may get
-performed because of unrelated bits changing. The flush of the old PCID
-needs to happen nevertheless.
-
-At the same time also eliminate a possible race with lazy context
-switch: Just like for CR4, CR3 may change at any time while interrupts
-are enabled, due to the __sync_local_execstate() invocation from the
-flush IPI handler. It is for that reason that the CR3 read, just like
-the CR4 one, must happen only after interrupts have been turned off.
-
-This is XSA-292.
-
-Reported-by: Sergey Dyasli <sergey.dyasli@citrix.com>
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Tested-by: Sergey Dyasli <sergey.dyasli@citrix.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
----
-v3: Adjust comments. Drop old_cr4 from the PGE check in the expression
- controlling the invocation of invpcid_flush_single_context(), as PGE
- is always clear there.
-v2: Decouple invpcid_flush_single_context() from 2nd CR4 write.
-
---- a/xen/arch/x86/flushtlb.c
-+++ b/xen/arch/x86/flushtlb.c
-@@ -103,9 +103,8 @@ static void do_tlb_flush(void)
-
- void switch_cr3_cr4(unsigned long cr3, unsigned long cr4)
- {
-- unsigned long flags, old_cr4;
-+ unsigned long flags, old_cr4, old_pcid;
- u32 t;
-- unsigned long old_pcid = cr3_pcid(read_cr3());
-
- /* This non-reentrant function is sometimes called in interrupt context. */
- local_irq_save(flags);
-@@ -133,15 +132,38 @@ void switch_cr3_cr4(unsigned long cr3, u
- */
- invpcid_flush_all_nonglobals();
-
-+ /*
-+ * If we don't change PCIDs, the CR3 write below needs to flush this very
-+ * PCID, even when a full flush was performed above, as we are currently
-+ * accumulating TLB entries again from the old address space.
-+ * NB: Clearing the bit when we don't use PCID is benign (as it is clear
-+ * already in that case), but allows the if() to be more simple.
-+ */
-+ old_pcid = cr3_pcid(read_cr3());
-+ if ( old_pcid == cr3_pcid(cr3) )
-+ cr3 &= ~X86_CR3_NOFLUSH;
-+
- write_cr3(cr3);
-
- if ( old_cr4 != cr4 )
- write_cr4(cr4);
-- else if ( old_pcid != cr3_pcid(cr3) )
-- /*
-- * Make sure no TLB entries related to the old PCID created between
-- * flushing the TLB and writing the new %cr3 value remain in the TLB.
-- */
-+
-+ /*
-+ * Make sure no TLB entries related to the old PCID created between
-+ * flushing the TLB and writing the new %cr3 value remain in the TLB.
-+ *
-+ * The write to CR4 just above has performed a wider flush in certain
-+ * cases, which therefore get excluded here. Since that write is
-+ * conditional, note in particular that it won't be skipped if PCIDE
-+ * transitions from 1 to 0. This is because the CR4 write further up will
-+ * have been skipped in this case, as PCIDE and PGE won't both be set at
-+ * the same time.
-+ *
-+ * Note also that PGE is always clear in old_cr4.
-+ */
-+ if ( old_pcid != cr3_pcid(cr3) &&
-+ !(cr4 & X86_CR4_PGE) &&
-+ (old_cr4 & X86_CR4_PCIDE) <= (cr4 & X86_CR4_PCIDE) )
- invpcid_flush_single_context(old_pcid);
-
- post_flush(t);
diff --git a/emulators/xen-kernel/files/xsa293-4.11-1.patch b/emulators/xen-kernel/files/xsa293-4.11-1.patch
deleted file mode 100644
index 0b8499fbbb15..000000000000
--- a/emulators/xen-kernel/files/xsa293-4.11-1.patch
+++ /dev/null
@@ -1,317 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/pv: Rewrite guest %cr4 handling from scratch
-
-The PV cr4 logic is almost impossible to follow, and leaks bits into guest
-context which definitely shouldn't be visible (in particular, VMXE).
-
-The biggest problem however, and source of the complexity, is that it derives
-new real and guest cr4 values from the current value in hardware - this is
-context dependent and an inappropriate source of information.
-
-Rewrite the cr4 logic to be invariant of the current value in hardware.
-
-First of all, modify write_ptbase() to always use mmu_cr4_features for IDLE
-and HVM contexts. mmu_cr4_features *is* the correct value to use, and makes
-the ASSERT() obviously redundant.
-
-For PV guests, curr->arch.pv.ctrlreg[4] remains the guests view of cr4, but
-all logic gets reworked in terms of this and mmu_cr4_features only.
-
-Two masks are introduced; bits which the guest has control over, and bits
-which are forwarded from Xen's settings. One guest-visible change here is
-that Xen's VMXE setting is no longer visible at all.
-
-pv_make_cr4() follows fairly closely from pv_guest_cr4_to_real_cr4(), but
-deliberately starts with mmu_cr4_features, and only alters the minimal subset
-of bits.
-
-The boot-time {compat_,}pv_cr4_mask variables are removed, as they are a
-remnant of the pre-CPUID policy days. pv_fixup_guest_cr4() gains a related
-derivation from the policy.
-
-Another guest visible change here is that a 32bit PV guest can now flip
-FSGSBASE in its view of CR4. While the {RD,WR}{FS,GS}BASE instructions are
-unusable outside of a 64bit code segment, the ability to modify FSGSBASE
-matches real hardware behaviour, and avoids the need for any 32bit/64bit
-differences in the logic.
-
-Overall, this patch shouldn't have a practical change in guest behaviour.
-VMXE will disappear from view, and an inquisitive 32bit kernel can now see
-FSGSBASE changing, but this new logic is otherwise bug-compatible with before.
-
-This is part of XSA-293
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index b1e50d1..675152a 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -733,49 +733,6 @@ int arch_domain_soft_reset(struct domain *d)
- return ret;
- }
-
--/*
-- * These are the masks of CR4 bits (subject to hardware availability) which a
-- * PV guest may not legitimiately attempt to modify.
-- */
--static unsigned long __read_mostly pv_cr4_mask, compat_pv_cr4_mask;
--
--static int __init init_pv_cr4_masks(void)
--{
-- unsigned long common_mask = ~X86_CR4_TSD;
--
-- /*
-- * All PV guests may attempt to modify TSD, DE and OSXSAVE.
-- */
-- if ( cpu_has_de )
-- common_mask &= ~X86_CR4_DE;
-- if ( cpu_has_xsave )
-- common_mask &= ~X86_CR4_OSXSAVE;
--
-- pv_cr4_mask = compat_pv_cr4_mask = common_mask;
--
-- /*
-- * 64bit PV guests may attempt to modify FSGSBASE.
-- */
-- if ( cpu_has_fsgsbase )
-- pv_cr4_mask &= ~X86_CR4_FSGSBASE;
--
-- return 0;
--}
--__initcall(init_pv_cr4_masks);
--
--unsigned long pv_guest_cr4_fixup(const struct vcpu *v, unsigned long guest_cr4)
--{
-- unsigned long hv_cr4 = real_cr4_to_pv_guest_cr4(read_cr4());
-- unsigned long mask = is_pv_32bit_vcpu(v) ? compat_pv_cr4_mask : pv_cr4_mask;
--
-- if ( (guest_cr4 & mask) != (hv_cr4 & mask) )
-- printk(XENLOG_G_WARNING
-- "d%d attempted to change %pv's CR4 flags %08lx -> %08lx\n",
-- current->domain->domain_id, v, hv_cr4, guest_cr4);
--
-- return (hv_cr4 & mask) | (guest_cr4 & ~mask);
--}
--
- #define xen_vcpu_guest_context vcpu_guest_context
- #define fpu_ctxt fpu_ctxt.x
- CHECK_FIELD_(struct, vcpu_guest_context, fpu_ctxt);
-@@ -789,7 +746,7 @@ int arch_set_info_guest(
- struct domain *d = v->domain;
- unsigned long cr3_gfn;
- struct page_info *cr3_page;
-- unsigned long flags, cr4;
-+ unsigned long flags;
- unsigned int i;
- int rc = 0, compat;
-
-@@ -978,9 +935,8 @@ int arch_set_info_guest(
- v->arch.pv_vcpu.ctrlreg[0] &= X86_CR0_TS;
- v->arch.pv_vcpu.ctrlreg[0] |= read_cr0() & ~X86_CR0_TS;
-
-- cr4 = v->arch.pv_vcpu.ctrlreg[4];
-- v->arch.pv_vcpu.ctrlreg[4] = cr4 ? pv_guest_cr4_fixup(v, cr4) :
-- real_cr4_to_pv_guest_cr4(mmu_cr4_features);
-+ v->arch.pv_vcpu.ctrlreg[4] =
-+ pv_fixup_guest_cr4(v, v->arch.pv_vcpu.ctrlreg[4]);
-
- memset(v->arch.debugreg, 0, sizeof(v->arch.debugreg));
- for ( i = 0; i < 8; i++ )
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 6509035..08634b7 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -505,33 +505,13 @@ void make_cr3(struct vcpu *v, mfn_t mfn)
- v->arch.cr3 |= get_pcid_bits(v, false);
- }
-
--unsigned long pv_guest_cr4_to_real_cr4(const struct vcpu *v)
--{
-- const struct domain *d = v->domain;
-- unsigned long cr4;
--
-- cr4 = v->arch.pv_vcpu.ctrlreg[4] & ~X86_CR4_DE;
-- cr4 |= mmu_cr4_features & (X86_CR4_PSE | X86_CR4_SMEP | X86_CR4_SMAP |
-- X86_CR4_OSXSAVE | X86_CR4_FSGSBASE);
--
-- if ( d->arch.pv_domain.pcid )
-- cr4 |= X86_CR4_PCIDE;
-- else if ( !d->arch.pv_domain.xpti )
-- cr4 |= X86_CR4_PGE;
--
-- cr4 |= d->arch.vtsc ? X86_CR4_TSD : 0;
--
-- return cr4;
--}
--
- void write_ptbase(struct vcpu *v)
- {
- struct cpu_info *cpu_info = get_cpu_info();
- unsigned long new_cr4;
-
- new_cr4 = (is_pv_vcpu(v) && !is_idle_vcpu(v))
-- ? pv_guest_cr4_to_real_cr4(v)
-- : ((read_cr4() & ~(X86_CR4_PCIDE | X86_CR4_TSD)) | X86_CR4_PGE);
-+ ? pv_make_cr4(v) : mmu_cr4_features;
-
- if ( is_pv_vcpu(v) && v->domain->arch.pv_domain.xpti )
- {
-@@ -550,8 +530,6 @@ void write_ptbase(struct vcpu *v)
- switch_cr3_cr4(v->arch.cr3, new_cr4);
- cpu_info->pv_cr3 = 0;
- }
--
-- ASSERT(is_pv_vcpu(v) || read_cr4() == mmu_cr4_features);
- }
-
- /*
-diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
-index b75ff6b..3965959 100644
---- a/xen/arch/x86/pv/domain.c
-+++ b/xen/arch/x86/pv/domain.c
-@@ -97,6 +97,52 @@ static void release_compat_l4(struct vcpu *v)
- v->arch.guest_table_user = pagetable_null();
- }
-
-+unsigned long pv_fixup_guest_cr4(const struct vcpu *v, unsigned long cr4)
-+{
-+ const struct cpuid_policy *p = v->domain->arch.cpuid;
-+
-+ /* Discard attempts to set guest controllable bits outside of the policy. */
-+ cr4 &= ~((p->basic.tsc ? 0 : X86_CR4_TSD) |
-+ (p->basic.de ? 0 : X86_CR4_DE) |
-+ (p->feat.fsgsbase ? 0 : X86_CR4_FSGSBASE) |
-+ (p->basic.xsave ? 0 : X86_CR4_OSXSAVE));
-+
-+ /* Masks expected to be disjoint sets. */
-+ BUILD_BUG_ON(PV_CR4_GUEST_MASK & PV_CR4_GUEST_VISIBLE_MASK);
-+
-+ /*
-+ * A guest sees the policy subset of its own choice of guest controllable
-+ * bits, and a subset of Xen's choice of certain hardware settings.
-+ */
-+ return ((cr4 & PV_CR4_GUEST_MASK) |
-+ (mmu_cr4_features & PV_CR4_GUEST_VISIBLE_MASK));
-+}
-+
-+unsigned long pv_make_cr4(const struct vcpu *v)
-+{
-+ const struct domain *d = v->domain;
-+ unsigned long cr4 = mmu_cr4_features &
-+ ~(X86_CR4_PCIDE | X86_CR4_PGE | X86_CR4_TSD);
-+
-+ /*
-+ * PCIDE or PGE depends on the PCID/XPTI settings, but must not both be
-+ * set, as it impacts the safety of TLB flushing.
-+ */
-+ if ( d->arch.pv_domain.pcid )
-+ cr4 |= X86_CR4_PCIDE;
-+ else if ( !d->arch.pv_domain.xpti )
-+ cr4 |= X86_CR4_PGE;
-+
-+ /*
-+ * TSD is needed if either the guest has elected to use it, or Xen is
-+ * virtualising the TSC value the guest sees.
-+ */
-+ if ( d->arch.vtsc || (v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_TSD) )
-+ cr4 |= X86_CR4_TSD;
-+
-+ return cr4;
-+}
-+
- int switch_compat(struct domain *d)
- {
- struct vcpu *v;
-@@ -191,7 +237,7 @@ int pv_vcpu_initialise(struct vcpu *v)
- /* PV guests by default have a 100Hz ticker. */
- v->periodic_period = MILLISECS(10);
-
-- v->arch.pv_vcpu.ctrlreg[4] = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
-+ v->arch.pv_vcpu.ctrlreg[4] = pv_fixup_guest_cr4(v, 0);
-
- if ( is_pv_32bit_domain(d) )
- {
-diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
-index ce2ec76..4abbc14 100644
---- a/xen/arch/x86/pv/emul-priv-op.c
-+++ b/xen/arch/x86/pv/emul-priv-op.c
-@@ -32,6 +32,7 @@
- #include <asm/hypercall.h>
- #include <asm/mc146818rtc.h>
- #include <asm/p2m.h>
-+#include <asm/pv/domain.h>
- #include <asm/pv/traps.h>
- #include <asm/shared.h>
- #include <asm/traps.h>
-@@ -785,8 +786,8 @@ static int write_cr(unsigned int reg, unsigned long val,
- }
-
- case 4: /* Write CR4 */
-- curr->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(curr, val);
-- write_cr4(pv_guest_cr4_to_real_cr4(curr));
-+ curr->arch.pv_vcpu.ctrlreg[4] = pv_fixup_guest_cr4(curr, val);
-+ write_cr4(pv_make_cr4(curr));
- ctxt_switch_levelling(curr);
- return X86EMUL_OKAY;
- }
-diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
-index ec81d78..c8aa8a5 100644
---- a/xen/include/asm-x86/domain.h
-+++ b/xen/include/asm-x86/domain.h
-@@ -610,17 +610,6 @@ bool update_secondary_system_time(struct vcpu *,
- void vcpu_show_execution_state(struct vcpu *);
- void vcpu_show_registers(const struct vcpu *);
-
--/* Clean up CR4 bits that are not under guest control. */
--unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
--
--/* Convert between guest-visible and real CR4 values. */
--unsigned long pv_guest_cr4_to_real_cr4(const struct vcpu *v);
--
--#define real_cr4_to_pv_guest_cr4(c) \
-- ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
-- X86_CR4_OSXSAVE | X86_CR4_SMEP | \
-- X86_CR4_FSGSBASE | X86_CR4_SMAP | X86_CR4_PCIDE))
--
- #define domain_max_vcpus(d) (is_hvm_domain(d) ? HVM_MAX_VCPUS : MAX_VIRT_CPUS)
-
- static inline struct vcpu_guest_context *alloc_vcpu_guest_context(void)
-diff --git a/xen/include/asm-x86/pv/domain.h b/xen/include/asm-x86/pv/domain.h
-index 4fea764..4e4710c 100644
---- a/xen/include/asm-x86/pv/domain.h
-+++ b/xen/include/asm-x86/pv/domain.h
-@@ -59,6 +59,23 @@ int pv_vcpu_initialise(struct vcpu *v);
- void pv_domain_destroy(struct domain *d);
- int pv_domain_initialise(struct domain *d);
-
-+/*
-+ * Bits which a PV guest can toggle in its view of cr4. Some are loaded into
-+ * hardware, while some are fully emulated.
-+ */
-+#define PV_CR4_GUEST_MASK \
-+ (X86_CR4_TSD | X86_CR4_DE | X86_CR4_FSGSBASE | X86_CR4_OSXSAVE)
-+
-+/* Bits which a PV guest may observe from the real hardware settings. */
-+#define PV_CR4_GUEST_VISIBLE_MASK \
-+ (X86_CR4_PAE | X86_CR4_MCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
-+
-+/* Given a new cr4 value, construct the resulting guest-visible cr4 value. */
-+unsigned long pv_fixup_guest_cr4(const struct vcpu *v, unsigned long cr4);
-+
-+/* Create a cr4 value to load into hardware, based on vcpu settings. */
-+unsigned long pv_make_cr4(const struct vcpu *v);
-+
- #else /* !CONFIG_PV */
-
- #include <xen/errno.h>
-@@ -68,6 +85,8 @@ static inline int pv_vcpu_initialise(struct vcpu *v) { return -EOPNOTSUPP; }
- static inline void pv_domain_destroy(struct domain *d) {}
- static inline int pv_domain_initialise(struct domain *d) { return -EOPNOTSUPP; }
-
-+static inline unsigned long pv_make_cr4(const struct vcpu *v) { return ~0ul; }
-+
- #endif /* CONFIG_PV */
-
- void paravirt_ctxt_switch_from(struct vcpu *v);
diff --git a/emulators/xen-kernel/files/xsa293-4.11-2.patch b/emulators/xen-kernel/files/xsa293-4.11-2.patch
deleted file mode 100644
index a005021b1483..000000000000
--- a/emulators/xen-kernel/files/xsa293-4.11-2.patch
+++ /dev/null
@@ -1,260 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/pv: Don't have %cr4.fsgsbase active behind a guest kernels back
-
-Currently, a 64bit PV guest can appear to set and clear FSGSBASE in %cr4, but
-the bit remains set in hardware. Therefore, the {RD,WR}{FS,GS}BASE are usable
-even when the guest kernel believes that they are disabled.
-
-The FSGSBASE feature isn't currently supported in Linux, and its context
-switch path has some optimisations which rely on userspace being unable to use
-the WR{FS,GS}BASE instructions. Xen's current behaviour undermines this
-expectation.
-
-In 64bit PV guest context, always load the guest kernels setting of FSGSBASE
-into %cr4. This requires adjusting how Xen uses the {RD,WR}{FS,GS}BASE
-instructions.
-
- * Delete the cpu_has_fsgsbase helper. It is no longer safe, as users need to
- check %cr4 directly.
- * The raw __rd{fs,gs}base() helpers are only safe to use when %cr4.fsgsbase
- is set. Comment this property.
- * The {rd,wr}{fs,gs}{base,shadow}() and read_msr() helpers are updated to use
- the current %cr4 value to determine which mechanism to use.
- * toggle_guest_mode() and save_segments() are update to avoid reading
- fs/gsbase if the values in hardware cannot be stale WRT struct vcpu. A
- consequence of this is that the write_cr() path needs to cache the current
- bases, as subsequent context switches will skip saving the values.
- * write_cr4() is updated to ensure that the shadow %cr4.fsgsbase value is
- observed in a safe way WRT the hardware setting, if an interrupt happens to
- hit in the middle.
- * pv_make_cr4() is updated for 64bit PV guests to use the guest kernels
- choice of FSGSBASE.
-
-This is part of XSA-293
-
-Reported-by: Andy Lutomirski <luto@kernel.org>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index 675152a..29f892c 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -1433,7 +1433,8 @@ static void save_segments(struct vcpu *v)
- regs->fs = read_sreg(fs);
- regs->gs = read_sreg(gs);
-
-- if ( cpu_has_fsgsbase && !is_pv_32bit_vcpu(v) )
-+ /* %fs/%gs bases can only be stale if WR{FS,GS}BASE are usable. */
-+ if ( (read_cr4() & X86_CR4_FSGSBASE) && !is_pv_32bit_vcpu(v) )
- {
- v->arch.pv_vcpu.fs_base = __rdfsbase();
- if ( v->arch.flags & TF_kernel_mode )
-diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
-index 3965959..228a174 100644
---- a/xen/arch/x86/pv/domain.c
-+++ b/xen/arch/x86/pv/domain.c
-@@ -140,6 +140,16 @@ unsigned long pv_make_cr4(const struct vcpu *v)
- if ( d->arch.vtsc || (v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_TSD) )
- cr4 |= X86_CR4_TSD;
-
-+ /*
-+ * The {RD,WR}{FS,GS}BASE are only useable in 64bit code segments. While
-+ * we must not have CR4.FSGSBASE set behind the back of a 64bit PV kernel,
-+ * we do leave it set in 32bit PV context to speed up Xen's context switch
-+ * path.
-+ */
-+ if ( !is_pv_32bit_domain(d) &&
-+ !(v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_FSGSBASE) )
-+ cr4 &= ~X86_CR4_FSGSBASE;
-+
- return cr4;
- }
-
-@@ -375,7 +385,8 @@ void toggle_guest_mode(struct vcpu *v)
- {
- ASSERT(!is_pv_32bit_vcpu(v));
-
-- if ( cpu_has_fsgsbase )
-+ /* %fs/%gs bases can only be stale if WR{FS,GS}BASE are usable. */
-+ if ( read_cr4() & X86_CR4_FSGSBASE )
- {
- if ( v->arch.flags & TF_kernel_mode )
- v->arch.pv_vcpu.gs_base_kernel = __rdgsbase();
-diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
-index 4abbc14..312c1ee 100644
---- a/xen/arch/x86/pv/emul-priv-op.c
-+++ b/xen/arch/x86/pv/emul-priv-op.c
-@@ -786,6 +786,17 @@ static int write_cr(unsigned int reg, unsigned long val,
- }
-
- case 4: /* Write CR4 */
-+ /*
-+ * If this write will disable FSGSBASE, refresh Xen's idea of the
-+ * guest bases now that they can no longer change.
-+ */
-+ if ( (curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_FSGSBASE) &&
-+ !(val & X86_CR4_FSGSBASE) )
-+ {
-+ curr->arch.pv_vcpu.fs_base = __rdfsbase();
-+ curr->arch.pv_vcpu.gs_base_kernel = __rdgsbase();
-+ }
-+
- curr->arch.pv_vcpu.ctrlreg[4] = pv_fixup_guest_cr4(curr, val);
- write_cr4(pv_make_cr4(curr));
- ctxt_switch_levelling(curr);
-@@ -835,14 +846,15 @@ static int read_msr(unsigned int reg, uint64_t *val,
- case MSR_FS_BASE:
- if ( is_pv_32bit_domain(currd) )
- break;
-- *val = cpu_has_fsgsbase ? __rdfsbase() : curr->arch.pv_vcpu.fs_base;
-+ *val = (read_cr4() & X86_CR4_FSGSBASE) ? __rdfsbase()
-+ : curr->arch.pv_vcpu.fs_base;
- return X86EMUL_OKAY;
-
- case MSR_GS_BASE:
- if ( is_pv_32bit_domain(currd) )
- break;
-- *val = cpu_has_fsgsbase ? __rdgsbase()
-- : curr->arch.pv_vcpu.gs_base_kernel;
-+ *val = (read_cr4() & X86_CR4_FSGSBASE) ? __rdgsbase()
-+ : curr->arch.pv_vcpu.gs_base_kernel;
- return X86EMUL_OKAY;
-
- case MSR_SHADOW_GS_BASE:
-diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
-index ecb0149..a353d76 100644
---- a/xen/arch/x86/setup.c
-+++ b/xen/arch/x86/setup.c
-@@ -1567,7 +1567,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
-
- cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS;
-
-- if ( cpu_has_fsgsbase )
-+ if ( boot_cpu_has(X86_FEATURE_FSGSBASE) )
- set_in_cr4(X86_CR4_FSGSBASE);
-
- if ( opt_invpcid && cpu_has_invpcid )
-diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
-index b237da1..861cb0a 100644
---- a/xen/include/asm-x86/cpufeature.h
-+++ b/xen/include/asm-x86/cpufeature.h
-@@ -90,7 +90,6 @@
- #define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES)
-
- /* CPUID level 0x00000007:0.ebx */
--#define cpu_has_fsgsbase boot_cpu_has(X86_FEATURE_FSGSBASE)
- #define cpu_has_bmi1 boot_cpu_has(X86_FEATURE_BMI1)
- #define cpu_has_hle boot_cpu_has(X86_FEATURE_HLE)
- #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2)
-diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
-index afbeb7f..1ba6ee3 100644
---- a/xen/include/asm-x86/msr.h
-+++ b/xen/include/asm-x86/msr.h
-@@ -120,6 +120,14 @@ static inline uint64_t rdtsc_ordered(void)
- : "=a" (low), "=d" (high) \
- : "c" (counter))
-
-+/*
-+ * On hardware supporting FSGSBASE, the value loaded into hardware is the
-+ * guest kernel's choice for 64bit PV guests (Xen's choice for Idle, HVM and
-+ * 32bit PV).
-+ *
-+ * Therefore, the {RD,WR}{FS,GS}BASE instructions are only safe to use if
-+ * %cr4.fsgsbase is set.
-+ */
- static inline unsigned long __rdfsbase(void)
- {
- unsigned long base;
-@@ -150,7 +158,7 @@ static inline unsigned long rdfsbase(void)
- {
- unsigned long base;
-
-- if ( cpu_has_fsgsbase )
-+ if ( read_cr4() & X86_CR4_FSGSBASE )
- return __rdfsbase();
-
- rdmsrl(MSR_FS_BASE, base);
-@@ -162,7 +170,7 @@ static inline unsigned long rdgsbase(void)
- {
- unsigned long base;
-
-- if ( cpu_has_fsgsbase )
-+ if ( read_cr4() & X86_CR4_FSGSBASE )
- return __rdgsbase();
-
- rdmsrl(MSR_GS_BASE, base);
-@@ -174,7 +182,7 @@ static inline unsigned long rdgsshadow(void)
- {
- unsigned long base;
-
-- if ( cpu_has_fsgsbase )
-+ if ( read_cr4() & X86_CR4_FSGSBASE )
- {
- asm volatile ( "swapgs" );
- base = __rdgsbase();
-@@ -188,7 +196,7 @@ static inline unsigned long rdgsshadow(void)
-
- static inline void wrfsbase(unsigned long base)
- {
-- if ( cpu_has_fsgsbase )
-+ if ( read_cr4() & X86_CR4_FSGSBASE )
- #ifdef HAVE_AS_FSGSBASE
- asm volatile ( "wrfsbase %0" :: "r" (base) );
- #else
-@@ -200,7 +208,7 @@ static inline void wrfsbase(unsigned long base)
-
- static inline void wrgsbase(unsigned long base)
- {
-- if ( cpu_has_fsgsbase )
-+ if ( read_cr4() & X86_CR4_FSGSBASE )
- #ifdef HAVE_AS_FSGSBASE
- asm volatile ( "wrgsbase %0" :: "r" (base) );
- #else
-@@ -212,7 +220,7 @@ static inline void wrgsbase(unsigned long base)
-
- static inline void wrgsshadow(unsigned long base)
- {
-- if ( cpu_has_fsgsbase )
-+ if ( read_cr4() & X86_CR4_FSGSBASE )
- {
- asm volatile ( "swapgs\n\t"
- #ifdef HAVE_AS_FSGSBASE
-diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
-index 2bd9e69..8e253dc 100644
---- a/xen/include/asm-x86/processor.h
-+++ b/xen/include/asm-x86/processor.h
-@@ -305,11 +305,31 @@ static inline unsigned long read_cr4(void)
-
- static inline void write_cr4(unsigned long val)
- {
-+ struct cpu_info *info = get_cpu_info();
-+
- /* No global pages in case of PCIDs enabled! */
- ASSERT(!(val & X86_CR4_PGE) || !(val & X86_CR4_PCIDE));
-
-- get_cpu_info()->cr4 = val;
-- asm volatile ( "mov %0,%%cr4" : : "r" (val) );
-+ /*
-+ * On hardware supporting FSGSBASE, the value in %cr4 is the kernel's
-+ * choice for 64bit PV guests, which impacts whether Xen can use the
-+ * instructions.
-+ *
-+ * The {rd,wr}{fs,gs}base() helpers use info->cr4 to work out whether it
-+ * is safe to execute the {RD,WR}{FS,GS}BASE instruction, falling back to
-+ * the MSR path if not. Some users require interrupt safety.
-+ *
-+ * If FSGSBASE is currently or about to become clear, reflect this in
-+ * info->cr4 before updating %cr4, so an interrupt which hits in the
-+ * middle won't observe FSGSBASE set in info->cr4 but clear in %cr4.
-+ */
-+ info->cr4 = val & (info->cr4 | ~X86_CR4_FSGSBASE);
-+
-+ asm volatile ( "mov %[val], %%cr4"
-+ : "+m" (info->cr4) /* Force ordering without a barrier. */
-+ : [val] "r" (val) );
-+
-+ info->cr4 = val;
- }
-
- /* Clear and set 'TS' bit respectively */
diff --git a/emulators/xen-kernel/files/xsa294-4.11.patch b/emulators/xen-kernel/files/xsa294-4.11.patch
deleted file mode 100644
index a0784d928f35..000000000000
--- a/emulators/xen-kernel/files/xsa294-4.11.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From: Jan Beulich <JBeulich@suse.com>
-Subject: x86/pv: _toggle_guest_pt() may not skip TLB flush for shadow mode guests
-
-For shadow mode guests (e.g. PV ones forced into that mode as L1TF
-mitigation, or during migration) update_cr3() -> sh_update_cr3() may
-result in a change to the (shadow) root page table (compared to the
-previous one when running the same vCPU with the same PCID). This can,
-first and foremost, be a result of memory pressure on the shadow memory
-pool of the domain. Shadow code legitimately relies on the original
-(prior to commit 5c81d260c2 ["xen/x86: use PCID feature"]) behavior of
-the subsequent CR3 write to flush the TLB of entries still left from
-walks with an earlier, different (shadow) root page table.
-
-Restore the flushing behavior, also for the second CR3 write on the exit
-path to guest context when XPTI is active. For the moment accept that
-this will introduce more flushes than are strictly necessary - no flush
-would be needed when the (shadow) root page table doesn't actually
-change, but this information isn't readily (i.e. without introducing a
-layering violation) available here.
-
-This is XSA-294.
-
-Reported-by: XXX PERSON <XXX EMAIL>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Tested-by: Juergen Gross <jgross@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-
-diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
-index b75ff6b..528413a 100644
---- a/xen/arch/x86/pv/domain.c
-+++ b/xen/arch/x86/pv/domain.c
-@@ -296,21 +296,35 @@ int pv_domain_initialise(struct domain *d)
- static void _toggle_guest_pt(struct vcpu *v)
- {
- const struct domain *d = v->domain;
-+ struct cpu_info *cpu_info = get_cpu_info();
-+ unsigned long cr3;
-
- v->arch.flags ^= TF_kernel_mode;
- update_cr3(v);
- if ( d->arch.pv_domain.xpti )
- {
-- struct cpu_info *cpu_info = get_cpu_info();
--
- cpu_info->root_pgt_changed = true;
- cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)) |
- (d->arch.pv_domain.pcid
- ? get_pcid_bits(v, true) : 0);
- }
-
-- /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */
-- write_cr3(v->arch.cr3);
-+ /*
-+ * Don't flush user global mappings from the TLB. Don't tick TLB clock.
-+ *
-+ * In shadow mode, though, update_cr3() may need to be accompanied by a
-+ * TLB flush (for just the incoming PCID), as the top level page table may
-+ * have changed behind our backs. To be on the safe side, suppress the
-+ * no-flush unconditionally in this case. The XPTI CR3 write, if enabled,
-+ * will then need to be a flushing one too.
-+ */
-+ cr3 = v->arch.cr3;
-+ if ( shadow_mode_enabled(d) )
-+ {
-+ cr3 &= ~X86_CR3_NOFLUSH;
-+ cpu_info->pv_cr3 &= ~X86_CR3_NOFLUSH;
-+ }
-+ write_cr3(cr3);
-
- if ( !(v->arch.flags & TF_kernel_mode) )
- return;
diff --git a/emulators/xen-kernel/pkg-message b/emulators/xen-kernel/pkg-message
index 9ba82df14698..9105ffc7bd43 100644
--- a/emulators/xen-kernel/pkg-message
+++ b/emulators/xen-kernel/pkg-message
@@ -7,12 +7,8 @@ In /etc/ttys:
xc0 "/usr/libexec/getty Pc" xterm on secure
In /boot/loader.conf for a dom0 with 2G memory and 4 vcpus:
- hw.pci.mcfg=0
xen_kernel="/boot/xen"
xen_cmdline="dom0_mem=2048M dom0_max_vcpus=4 dom0=pvh com1=115200,8n1 guest_loglvl=all loglvl=all"
Add to the above xen_cmdline in order to activate the serial console:
console=com1
-
-In /boot/menu.rc.local:
- try-include /boot/xen.4th