From 0dd10c0047b5916f56e50ea0b0c7bbadc99c42b1 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Tue, 13 May 2014 16:40:27 +0000 Subject: Don't include the guest memory segments in the bhyve(8) process core dump. This has not added a lot of value when debugging bhyve issues while greatly increasing the time and space required to store the core file. Passing the "-C" option to bhyve(8) will change the default and dump guest memory in the core dump. Requested by: grehan Reviewed by: grehan --- lib/libvmmapi/vmmapi.c | 18 +++++++++++++++--- lib/libvmmapi/vmmapi.h | 3 +++ 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'lib/libvmmapi') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 4a7f852c6ffc..b853ae7e273d 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -57,6 +57,7 @@ struct vmctx { int fd; uint32_t lowmem_limit; enum vm_mmap_style vms; + int memflags; size_t lowmem; char *lowmem_addr; size_t highmem; @@ -101,6 +102,7 @@ vm_open(const char *name) assert(vm != NULL); vm->fd = -1; + vm->memflags = 0; vm->lowmem_limit = 3 * GB; vm->name = (char *)(vm + 1); strcpy(vm->name, name); @@ -180,10 +182,17 @@ vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit) ctx->lowmem_limit = limit; } +void +vm_set_memflags(struct vmctx *ctx, int flags) +{ + + ctx->memflags = flags; +} + static int setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr) { - int error; + int error, mmap_flags; struct vm_memory_segment seg; /* @@ -195,8 +204,11 @@ setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr) seg.len = len; error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); if (error == 0 && addr != NULL) { - *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, - ctx->fd, gpa); + mmap_flags = MAP_SHARED; + if ((ctx->memflags & VM_MEM_F_INCORE) == 0) + mmap_flags |= MAP_NOCORE; + *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags, + ctx->fd, gpa); } return (error); } diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 2a2ca6b7f9dc..c1a4b35b3628 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -42,6 +42,8 @@ enum vm_mmap_style { VM_MMAP_SPARSE, /* mappings created on-demand */ }; +#define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */ + int vm_create(const char *name); struct vmctx *vm_open(const char *name); void vm_destroy(struct vmctx *ctx); @@ -53,6 +55,7 @@ void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len); int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num); uint32_t vm_get_lowmem_limit(struct vmctx *ctx); void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit); +void vm_set_memflags(struct vmctx *ctx, int flags); int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access); int vm_get_desc(struct vmctx *ctx, int vcpu, int reg, -- cgit v1.3 From b3e9732a763de0001eab2b331b635be53c3f32ad Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 15 May 2014 14:16:55 +0000 Subject: Implement a PCI interrupt router to route PCI legacy INTx interrupts to the legacy 8259A PICs. - Implement an ICH-comptabile PCI interrupt router on the lpc device with 8 steerable pins configured via config space access to byte-wide registers at 0x60-63 and 0x68-6b. - For each configured PCI INTx interrupt, route it to both an I/O APIC pin and a PCI interrupt router pin. When a PCI INTx interrupt is asserted, ensure that both pins are asserted. - Provide an initial routing of PCI interrupt router (PIRQ) pins to 8259A pins (ISA IRQs) and initialize the interrupt line config register for the corresponding PCI function with the ISA IRQ as this matches existing hardware. - Add a global _PIC method for OSPM to select the desired interrupt routing configuration. - Update the _PRT methods for PCI bridges to provide both APIC and legacy PRT tables and return the appropriate table based on the configured routing configuration. Note that if the lpc device is not configured, no routing information is provided. - When the lpc device is enabled, provide ACPI PCI link devices corresponding to each PIRQ pin. - Add a VMM ioctl to adjust the trigger mode (edge vs level) for 8259A pins via the ELCR. - Mark the power management SCI as level triggered. - Don't hardcode the number of elements in Packages in the source for the DSDT. iasl(8) will fill in the actual number of elements, and this makes it simpler to generate a Package with a variable number of elements. Reviewed by: tycho --- lib/libvmmapi/vmmapi.c | 14 ++ lib/libvmmapi/vmmapi.h | 2 + sys/amd64/include/vmm.h | 5 + sys/amd64/include/vmm_dev.h | 8 + sys/amd64/vmm/io/vatpic.c | 37 +++++ sys/amd64/vmm/io/vatpic.h | 1 + sys/amd64/vmm/vmm_dev.c | 6 + usr.sbin/bhyve/Makefile | 1 + usr.sbin/bhyve/acpi.c | 2 +- usr.sbin/bhyve/acpi.h | 1 + usr.sbin/bhyve/bhyverun.c | 3 + usr.sbin/bhyve/mptbl.c | 3 +- usr.sbin/bhyve/pci_emul.c | 177 +++++++++++++++------- usr.sbin/bhyve/pci_emul.h | 6 +- usr.sbin/bhyve/pci_irq.c | 349 ++++++++++++++++++++++++++++++++++++++++++++ usr.sbin/bhyve/pci_irq.h | 45 ++++++ usr.sbin/bhyve/pci_lpc.c | 68 ++++++++- usr.sbin/bhyve/pci_lpc.h | 2 + usr.sbin/bhyve/pm.c | 13 ++ 19 files changed, 682 insertions(+), 61 deletions(-) create mode 100644 usr.sbin/bhyve/pci_irq.c create mode 100644 usr.sbin/bhyve/pci_irq.h (limited to 'lib/libvmmapi') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index b853ae7e273d..5e630f87d248 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -507,6 +507,7 @@ int vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) { struct vm_isa_irq isa_irq; + bzero(&isa_irq, sizeof(struct vm_isa_irq)); isa_irq.atpic_irq = atpic_irq; isa_irq.ioapic_irq = ioapic_irq; @@ -514,6 +515,19 @@ vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq)); } +int +vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq, + enum vm_intr_trigger trigger) +{ + struct vm_isa_irq_trigger isa_irq_trigger; + + bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger)); + isa_irq_trigger.atpic_irq = atpic_irq; + isa_irq_trigger.trigger = trigger; + + return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger)); +} + int vm_inject_nmi(struct vmctx *ctx, int vcpu) { diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index c1a4b35b3628..88e99475cd97 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -78,6 +78,8 @@ int vm_ioapic_pincount(struct vmctx *ctx, int *pincount); int vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq); int vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq); int vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq); +int vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq, + enum vm_intr_trigger trigger); int vm_inject_nmi(struct vmctx *ctx, int vcpu); int vm_capability_name2type(const char *capname); const char *vm_capability_type2name(int type); diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 68240b9d1317..50d879b1bb8b 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -301,6 +301,11 @@ enum x2apic_state { X2APIC_STATE_LAST }; +enum vm_intr_trigger { + EDGE_TRIGGER, + LEVEL_TRIGGER +}; + /* * The 'access' field has the format specified in Table 21-2 of the Intel * Architecture Manual vol 3b. diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index fcd437f0198f..ecafa9ca5e31 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -84,6 +84,11 @@ struct vm_isa_irq { int ioapic_irq; }; +struct vm_isa_irq_trigger { + int atpic_irq; + enum vm_intr_trigger trigger; +}; + struct vm_capability { int cpuid; enum vm_cap_type captype; @@ -213,6 +218,7 @@ enum { IOCNUM_ISA_ASSERT_IRQ = 80, IOCNUM_ISA_DEASSERT_IRQ = 81, IOCNUM_ISA_PULSE_IRQ = 82, + IOCNUM_ISA_SET_IRQ_TRIGGER = 83, }; #define VM_RUN \ @@ -253,6 +259,8 @@ enum { _IOW('v', IOCNUM_ISA_DEASSERT_IRQ, struct vm_isa_irq) #define VM_ISA_PULSE_IRQ \ _IOW('v', IOCNUM_ISA_PULSE_IRQ, struct vm_isa_irq) +#define VM_ISA_SET_IRQ_TRIGGER \ + _IOW('v', IOCNUM_ISA_SET_IRQ_TRIGGER, struct vm_isa_irq_trigger) #define VM_SET_CAPABILITY \ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) #define VM_GET_CAPABILITY \ diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c index 66905e70bceb..298560c98e29 100644 --- a/sys/amd64/vmm/io/vatpic.c +++ b/sys/amd64/vmm/io/vatpic.c @@ -446,6 +446,43 @@ vatpic_pulse_irq(struct vm *vm, int irq) return (vatpic_set_irqstate(vm, irq, IRQSTATE_PULSE)); } +int +vatpic_set_irq_trigger(struct vm *vm, int irq, enum vm_intr_trigger trigger) +{ + struct vatpic *vatpic; + + if (irq < 0 || irq > 15) + return (EINVAL); + + /* + * See comment in vatpic_elc_handler. These IRQs must be + * edge triggered. + */ + if (trigger == LEVEL_TRIGGER) { + switch (irq) { + case 0: + case 1: + case 2: + case 8: + case 13: + return (EINVAL); + } + } + + vatpic = vm_atpic(vm); + + VATPIC_LOCK(vatpic); + + if (trigger == LEVEL_TRIGGER) + vatpic->elc[irq >> 3] |= 1 << (irq & 0x7); + else + vatpic->elc[irq >> 3] &= ~(1 << (irq & 0x7)); + + VATPIC_UNLOCK(vatpic); + + return (0); +} + void vatpic_pending_intr(struct vm *vm, int *vecptr) { diff --git a/sys/amd64/vmm/io/vatpic.h b/sys/amd64/vmm/io/vatpic.h index d4d6b26cfa52..84d5651dd4cb 100644 --- a/sys/amd64/vmm/io/vatpic.h +++ b/sys/amd64/vmm/io/vatpic.h @@ -49,6 +49,7 @@ int vatpic_elc_handler(void *vm, int vcpuid, bool in, int port, int bytes, int vatpic_assert_irq(struct vm *vm, int irq); int vatpic_deassert_irq(struct vm *vm, int irq); int vatpic_pulse_irq(struct vm *vm, int irq); +int vatpic_set_irq_trigger(struct vm *vm, int irq, enum vm_intr_trigger trigger); void vatpic_pending_intr(struct vm *vm, int *vecptr); void vatpic_intr_accepted(struct vm *vm, int vector); diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 3112c52c97a8..f1d57955767d 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -156,6 +156,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_lapic_msi *vmmsi; struct vm_ioapic_irq *ioapic_irq; struct vm_isa_irq *isa_irq; + struct vm_isa_irq_trigger *isa_irq_trigger; struct vm_capability *vmcap; struct vm_pptdev *pptdev; struct vm_pptdev_mmio *pptmmio; @@ -346,6 +347,11 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, if (error == 0 && isa_irq->ioapic_irq != -1) error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); break; + case VM_ISA_SET_IRQ_TRIGGER: + isa_irq_trigger = (struct vm_isa_irq_trigger *)data; + error = vatpic_set_irq_trigger(sc->vm, + isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); + break; case VM_MAP_MEMORY: seg = (struct vm_memory_segment *)data; error = vm_malloc(sc->vm, seg->gpa, seg->len); diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index c73cb3d31e02..23e16cb7d0d3 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -23,6 +23,7 @@ SRCS= \ pci_ahci.c \ pci_emul.c \ pci_hostbridge.c \ + pci_irq.c \ pci_lpc.c \ pci_passthru.c \ pci_virtio_block.c \ diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c index db7f0eb72c48..c4ec020bd5e1 100644 --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -704,7 +704,7 @@ basl_fwrite_dsdt(FILE *fp) dsdt_line("DefinitionBlock (\"bhyve_dsdt.aml\", \"DSDT\", 2," "\"BHYVE \", \"BVDSDT \", 0x00000001)"); dsdt_line("{"); - dsdt_line(" Name (_S5, Package (0x02)"); + dsdt_line(" Name (_S5, Package ()"); dsdt_line(" {"); dsdt_line(" 0x05,"); dsdt_line(" Zero,"); diff --git a/usr.sbin/bhyve/acpi.h b/usr.sbin/bhyve/acpi.h index 57edc48cdc15..652164af351c 100644 --- a/usr.sbin/bhyve/acpi.h +++ b/usr.sbin/bhyve/acpi.h @@ -49,5 +49,6 @@ void dsdt_fixed_irq(uint8_t irq); void dsdt_fixed_mem32(uint32_t base, uint32_t length); void dsdt_indent(int levels); void dsdt_unindent(int levels); +void sci_init(struct vmctx *ctx); #endif /* _ACPI_H_ */ diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index b2354c9e22a6..d9b4418e99f0 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include "mevent.h" #include "mptbl.h" #include "pci_emul.h" +#include "pci_irq.h" #include "pci_lpc.h" #include "smbiostbl.h" #include "xmsr.h" @@ -770,9 +771,11 @@ main(int argc, char *argv[]) init_mem(); init_inout(); + pci_irq_init(ctx); ioapic_init(ctx); rtc_init(ctx); + sci_init(ctx); /* * Exit if a device emulation finds an error in it's initilization diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c index 4c2167e98c85..904d103a51c5 100644 --- a/usr.sbin/bhyve/mptbl.c +++ b/usr.sbin/bhyve/mptbl.c @@ -210,7 +210,8 @@ mpt_count_ioint_entries(void) } static void -mpt_generate_pci_int(int bus, int slot, int pin, int ioapic_irq, void *arg) +mpt_generate_pci_int(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, + void *arg) { int_entry_ptr *mpiep, mpie; diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 5b87da7f0604..e7f4894e1679 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include "ioapic.h" #include "mem.h" #include "pci_emul.h" +#include "pci_irq.h" #include "pci_lpc.h" #define CONF1_ADDR_PORT 0x0cf8 @@ -81,6 +82,7 @@ struct funcinfo { struct intxinfo { int ii_count; + int ii_pirq_pin; int ii_ioapic_irq; }; @@ -113,6 +115,7 @@ static uint64_t pci_emul_membase64; #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL static struct pci_devemu *pci_emul_finddev(char *name); +static void pci_lintr_route(struct pci_devinst *pi); static void pci_lintr_update(struct pci_devinst *pi); static struct mem_range pci_mem_hole; @@ -697,6 +700,7 @@ pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, pthread_mutex_init(&pdi->pi_lintr.lock, NULL); pdi->pi_lintr.pin = 0; pdi->pi_lintr.state = IDLE; + pdi->pi_lintr.pirq_pin = 0; pdi->pi_lintr.ioapic_irq = 0; pdi->pi_d = pde; snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); @@ -1066,6 +1070,27 @@ init_pci(struct vmctx *ctx) bi->memlimit64 = pci_emul_membase64; } + /* + * PCI backends are initialized before routing INTx interrupts + * so that LPC devices are able to reserve ISA IRQs before + * routing PIRQ pins. + */ + for (bus = 0; bus < MAXBUSES; bus++) { + if ((bi = pci_businfo[bus]) == NULL) + continue; + + for (slot = 0; slot < MAXSLOTS; slot++) { + si = &bi->slotinfo[slot]; + for (func = 0; func < MAXFUNCS; func++) { + fi = &si->si_funcs[func]; + if (fi->fi_devi == NULL) + continue; + pci_lintr_route(fi->fi_devi); + } + } + } + lpc_pirq_routed(); + /* * The guest physical memory map looks like the following: * [0, lowmem) guest system memory @@ -1093,19 +1118,36 @@ init_pci(struct vmctx *ctx) } static void -pci_prt_entry(int bus, int slot, int pin, int ioapic_irq, void *arg) +pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, + void *arg) { - int *count; - count = arg; - dsdt_line(" Package (0x04)"); + dsdt_line(" Package ()"); dsdt_line(" {"); dsdt_line(" 0x%X,", slot << 16 | 0xffff); dsdt_line(" 0x%02X,", pin - 1); dsdt_line(" Zero,"); dsdt_line(" 0x%X", ioapic_irq); - dsdt_line(" }%s", *count == 1 ? "" : ","); - (*count)--; + dsdt_line(" },"); +} + +static void +pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, + void *arg) +{ + char *name; + + name = lpc_pirq_name(pirq_pin); + if (name == NULL) + return; + dsdt_line(" Package ()"); + dsdt_line(" {"); + dsdt_line(" 0x%X,", slot << 16 | 0xffff); + dsdt_line(" 0x%02X,", pin - 1); + dsdt_line(" %s,", name); + dsdt_line(" 0x00"); + dsdt_line(" },"); + free(name); } /* @@ -1118,7 +1160,7 @@ pci_bus_write_dsdt(int bus) struct businfo *bi; struct slotinfo *si; struct pci_devinst *pi; - int count, slot, func; + int count, func, slot; /* * If there are no devices on this 'bus' then just return. @@ -1133,9 +1175,6 @@ pci_bus_write_dsdt(int bus) return; } - dsdt_indent(1); - dsdt_line("Scope (_SB)"); - dsdt_line("{"); dsdt_line(" Device (PC%02X)", bus); dsdt_line(" {"); dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); @@ -1228,10 +1267,25 @@ pci_bus_write_dsdt(int bus) count = pci_count_lintr(bus); if (count != 0) { dsdt_indent(2); - dsdt_line("Name (_PRT, Package (0x%02X)", count); + dsdt_line("Name (PPRT, Package ()"); dsdt_line("{"); - pci_walk_lintr(bus, pci_prt_entry, &count); - dsdt_line("})"); + pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); + dsdt_line("})"); + dsdt_line("Name (APRT, Package ()"); + dsdt_line("{"); + pci_walk_lintr(bus, pci_apic_prt_entry, NULL); + dsdt_line("})"); + dsdt_line("Method (_PRT, 0, NotSerialized)"); + dsdt_line("{"); + dsdt_line(" If (PICM)"); + dsdt_line(" {"); + dsdt_line(" Return (APRT)"); + dsdt_line(" }"); + dsdt_line(" Else"); + dsdt_line(" {"); + dsdt_line(" Return (PPRT)"); + dsdt_line(" }"); + dsdt_line("}"); dsdt_unindent(2); } @@ -1247,8 +1301,6 @@ pci_bus_write_dsdt(int bus) dsdt_unindent(2); done: dsdt_line(" }"); - dsdt_line("}"); - dsdt_unindent(1); } void @@ -1256,8 +1308,19 @@ pci_write_dsdt(void) { int bus; + dsdt_indent(1); + dsdt_line("Name (PICM, 0x00)"); + dsdt_line("Method (_PIC, 1, NotSerialized)"); + dsdt_line("{"); + dsdt_line(" Store (Arg0, PICM)"); + dsdt_line("}"); + dsdt_line(""); + dsdt_line("Scope (_SB)"); + dsdt_line("{"); for (bus = 0; bus < MAXBUSES; bus++) pci_bus_write_dsdt(bus); + dsdt_line("}"); + dsdt_unindent(1); } int @@ -1330,18 +1393,19 @@ pci_lintr_permitted(struct pci_devinst *pi) (cmd & PCIM_CMD_INTxDIS))); } -int +void pci_lintr_request(struct pci_devinst *pi) { struct businfo *bi; struct slotinfo *si; - int bestpin, bestcount, irq, pin; + int bestpin, bestcount, pin; bi = pci_businfo[pi->pi_bus]; assert(bi != NULL); /* - * First, allocate a pin from our slot. + * Just allocate a pin from our slot. The pin will be + * assigned IRQs later when interrupts are routed. */ si = &bi->slotinfo[pi->pi_slot]; bestpin = 0; @@ -1353,26 +1417,43 @@ pci_lintr_request(struct pci_devinst *pi) } } - /* - * Attempt to allocate an I/O APIC pin for this intpin. If - * 8259A support is added we will need a separate field to - * assign the intpin to an input pin on the PCI interrupt - * router. - */ - if (si->si_intpins[bestpin].ii_count == 0) { - irq = ioapic_pci_alloc_irq(); - if (irq < 0) - return (-1); - si->si_intpins[bestpin].ii_ioapic_irq = irq; - } else - irq = si->si_intpins[bestpin].ii_ioapic_irq; si->si_intpins[bestpin].ii_count++; - pi->pi_lintr.pin = bestpin + 1; - pi->pi_lintr.ioapic_irq = irq; - pci_set_cfgdata8(pi, PCIR_INTLINE, irq); pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); - return (0); +} + +static void +pci_lintr_route(struct pci_devinst *pi) +{ + struct businfo *bi; + struct intxinfo *ii; + + if (pi->pi_lintr.pin == 0) + return; + + bi = pci_businfo[pi->pi_bus]; + assert(bi != NULL); + ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; + + /* + * Attempt to allocate an I/O APIC pin for this intpin if one + * is not yet assigned. + */ + if (ii->ii_ioapic_irq == 0) + ii->ii_ioapic_irq = ioapic_pci_alloc_irq(); + assert(ii->ii_ioapic_irq > 0); + + /* + * Attempt to allocate a PIRQ pin for this intpin if one is + * not yet assigned. + */ + if (ii->ii_pirq_pin == 0) + ii->ii_pirq_pin = pirq_alloc_pin(pi->pi_vmctx); + assert(ii->ii_pirq_pin > 0); + + pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; + pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; + pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); } void @@ -1385,8 +1466,7 @@ pci_lintr_assert(struct pci_devinst *pi) if (pi->pi_lintr.state == IDLE) { if (pci_lintr_permitted(pi)) { pi->pi_lintr.state = ASSERTED; - vm_ioapic_assert_irq(pi->pi_vmctx, - pi->pi_lintr.ioapic_irq); + pci_irq_assert(pi); } else pi->pi_lintr.state = PENDING; } @@ -1402,7 +1482,7 @@ pci_lintr_deassert(struct pci_devinst *pi) pthread_mutex_lock(&pi->pi_lintr.lock); if (pi->pi_lintr.state == ASSERTED) { pi->pi_lintr.state = IDLE; - vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); + pci_irq_deassert(pi); } else if (pi->pi_lintr.state == PENDING) pi->pi_lintr.state = IDLE; pthread_mutex_unlock(&pi->pi_lintr.lock); @@ -1414,11 +1494,11 @@ pci_lintr_update(struct pci_devinst *pi) pthread_mutex_lock(&pi->pi_lintr.lock); if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { - vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); + pci_irq_deassert(pi); pi->pi_lintr.state = PENDING; } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { pi->pi_lintr.state = ASSERTED; - vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); + pci_irq_assert(pi); } pthread_mutex_unlock(&pi->pi_lintr.lock); } @@ -1458,7 +1538,8 @@ pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) for (pin = 0; pin < 4; pin++) { ii = &si->si_intpins[pin]; if (ii->ii_count != 0) - cb(bus, slot, pin + 1, ii->ii_ioapic_irq, arg); + cb(bus, slot, pin + 1, ii->ii_pirq_pin, + ii->ii_ioapic_irq, arg); } } } @@ -1755,20 +1836,6 @@ INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); -/* - * I/O ports to configure PCI IRQ routing. We ignore all writes to it. - */ -static int -pci_irq_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - assert(in == 0); - return (0); -} -INOUT_PORT(pci_irq, 0xC00, IOPORT_F_OUT, pci_irq_port_handler); -INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler); -SYSRES_IO(0xC00, 2); - #define PCI_EMUL_TEST #ifdef PCI_EMUL_TEST /* diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h index e1040a8b715a..866ffc5b8224 100644 --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -120,6 +120,7 @@ struct pci_devinst { struct { int8_t pin; enum lintr_stat state; + int pirq_pin; int ioapic_irq; pthread_mutex_t lock; } pi_lintr; @@ -200,7 +201,8 @@ struct pciecap { uint16_t slot_status2; } __packed; -typedef void (*pci_lintr_cb)(int b, int s, int pin, int ioapic_irq, void *arg); +typedef void (*pci_lintr_cb)(int b, int s, int pin, int pirq_pin, + int ioapic_irq, void *arg); int init_pci(struct vmctx *ctx); void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, @@ -218,7 +220,7 @@ void pci_generate_msi(struct pci_devinst *pi, int msgnum); void pci_generate_msix(struct pci_devinst *pi, int msgnum); void pci_lintr_assert(struct pci_devinst *pi); void pci_lintr_deassert(struct pci_devinst *pi); -int pci_lintr_request(struct pci_devinst *pi); +void pci_lintr_request(struct pci_devinst *pi); int pci_msi_enabled(struct pci_devinst *pi); int pci_msix_enabled(struct pci_devinst *pi); int pci_msix_table_bar(struct pci_devinst *pi); diff --git a/usr.sbin/bhyve/pci_irq.c b/usr.sbin/bhyve/pci_irq.c new file mode 100644 index 000000000000..653aeb0ff1f6 --- /dev/null +++ b/usr.sbin/bhyve/pci_irq.c @@ -0,0 +1,349 @@ +/*- + * Copyright (c) 2014 Advanced Computing Technologies LLC + * Written by: John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "acpi.h" +#include "inout.h" +#include "pci_emul.h" +#include "pci_irq.h" +#include "pci_lpc.h" + +/* + * Implement an 8 pin PCI interrupt router compatible with the router + * present on Intel's ICH10 chip. + */ + +/* Fields in each PIRQ register. */ +#define PIRQ_DIS 0x80 +#define PIRQ_IRQ 0x0f + +/* Only IRQs 3-7, 9-12, and 14-15 are permitted. */ +#define PERMITTED_IRQS 0xdef8 +#define IRQ_PERMITTED(irq) (((1U << (irq)) & PERMITTED_IRQS) != 0) + +/* IRQ count to disable an IRQ. */ +#define IRQ_DISABLED 0xff + +static struct pirq { + uint8_t reg; + int use_count; + int active_count; + pthread_mutex_t lock; +} pirqs[8]; + +static u_char irq_counts[16]; +static int pirq_cold = 1; + +/* + * Returns true if this pin is enabled with a valid IRQ. Setting the + * register to a reserved IRQ causes interrupts to not be asserted as + * if the pin was disabled. + */ +static bool +pirq_valid_irq(int reg) +{ + + if (reg & PIRQ_DIS) + return (false); + return (IRQ_PERMITTED(reg & PIRQ_IRQ)); +} + +uint8_t +pirq_read(int pin) +{ + + assert(pin > 0 && pin <= nitems(pirqs)); + return (pirqs[pin - 1].reg); +} + +void +pirq_write(struct vmctx *ctx, int pin, uint8_t val) +{ + struct pirq *pirq; + + assert(pin > 0 && pin <= nitems(pirqs)); + pirq = &pirqs[pin - 1]; + pthread_mutex_lock(&pirq->lock); + if (pirq->reg != (val & (PIRQ_DIS | PIRQ_IRQ))) { + if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg)) + vm_isa_deassert_irq(ctx, pirq->reg & PIRQ_IRQ, -1); + pirq->reg = val & (PIRQ_DIS | PIRQ_IRQ); + if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg)) + vm_isa_assert_irq(ctx, pirq->reg & PIRQ_IRQ, -1); + } + pthread_mutex_unlock(&pirq->lock); +} + +void +pci_irq_reserve(int irq) +{ + + assert(irq < nitems(irq_counts)); + assert(pirq_cold); + assert(irq_counts[irq] == 0 || irq_counts[irq] == IRQ_DISABLED); + irq_counts[irq] = IRQ_DISABLED; +} + +void +pci_irq_use(int irq) +{ + + assert(irq < nitems(irq_counts)); + assert(pirq_cold); + if (irq_counts[irq] != IRQ_DISABLED) + irq_counts[irq]++; +} + +void +pci_irq_init(struct vmctx *ctx) +{ + int i; + + for (i = 0; i < nitems(pirqs); i++) { + pirqs[i].reg = PIRQ_DIS; + pirqs[i].use_count = 0; + pirqs[i].active_count = 0; + pthread_mutex_init(&pirqs[i].lock, NULL); + } + for (i = 0; i < nitems(irq_counts); i++) { + if (IRQ_PERMITTED(i)) + irq_counts[i] = 0; + else + irq_counts[i] = IRQ_DISABLED; + } +} + +void +pci_irq_assert(struct pci_devinst *pi) +{ + struct pirq *pirq; + + if (pi->pi_lintr.pirq_pin > 0) { + assert(pi->pi_lintr.pirq_pin <= nitems(pirqs)); + pirq = &pirqs[pi->pi_lintr.pirq_pin - 1]; + pthread_mutex_lock(&pirq->lock); + pirq->active_count++; + if (pirq->active_count == 1 && pirq_valid_irq(pirq->reg)) { + vm_isa_assert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ, + pi->pi_lintr.ioapic_irq); + pthread_mutex_unlock(&pirq->lock); + return; + } + pthread_mutex_unlock(&pirq->lock); + } + vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); +} + +void +pci_irq_deassert(struct pci_devinst *pi) +{ + struct pirq *pirq; + + if (pi->pi_lintr.pirq_pin > 0) { + assert(pi->pi_lintr.pirq_pin <= nitems(pirqs)); + pirq = &pirqs[pi->pi_lintr.pirq_pin - 1]; + pthread_mutex_lock(&pirq->lock); + pirq->active_count--; + if (pirq->active_count == 0 && pirq_valid_irq(pirq->reg)) { + vm_isa_deassert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ, + pi->pi_lintr.ioapic_irq); + pthread_mutex_unlock(&pirq->lock); + return; + } + pthread_mutex_unlock(&pirq->lock); + } + vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); +} + +int +pirq_alloc_pin(struct vmctx *ctx) +{ + int best_count, best_irq, best_pin, irq, pin; + + pirq_cold = 1; + + /* First, find the least-used PIRQ pin. */ + best_pin = 0; + best_count = pirqs[0].use_count; + for (pin = 1; pin < nitems(pirqs); pin++) { + if (pirqs[pin].use_count < best_count) { + best_pin = pin; + best_count = pirqs[pin].use_count; + } + } + pirqs[best_pin].use_count++; + + /* Second, route this pin to an IRQ. */ + if (pirqs[best_pin].reg == PIRQ_DIS) { + best_irq = -1; + best_count = 0; + for (irq = 0; irq < nitems(irq_counts); irq++) { + if (irq_counts[irq] == IRQ_DISABLED) + continue; + if (best_irq == -1 || irq_counts[irq] < best_count) { + best_irq = irq; + best_count = irq_counts[irq]; + } + } + assert(best_irq != 0); + irq_counts[best_irq]++; + pirqs[best_pin].reg = best_irq; + vm_isa_set_irq_trigger(ctx, best_irq, LEVEL_TRIGGER); + } + + return (best_pin + 1); +} + +int +pirq_irq(int pin) +{ + + if (pin == -1) + return (255); + assert(pin > 0 && pin <= nitems(pirqs)); + return (pirqs[pin - 1].reg & PIRQ_IRQ); +} + +/* XXX: Generate $PIR table. */ + +static void +pirq_dsdt(void) +{ + char *irq_prs, *old; + int irq, pin; + + irq_prs = NULL; + for (irq = 0; irq < nitems(irq_counts); irq++) { + if (!IRQ_PERMITTED(irq)) + continue; + if (irq_prs == NULL) + asprintf(&irq_prs, "%d", irq); + else { + old = irq_prs; + asprintf(&irq_prs, "%s,%d", old, irq); + free(old); + } + } + + /* + * A helper method to validate a link register's value. This + * duplicates pirq_valid_irq(). + */ + dsdt_line(""); + dsdt_line("Method (PIRV, 1, NotSerialized)"); + dsdt_line("{"); + dsdt_line(" If (And (Arg0, 0x%02X))", PIRQ_DIS); + dsdt_line(" {"); + dsdt_line(" Return (0x00)"); + dsdt_line(" }"); + dsdt_line(" And (Arg0, 0x%02X, Local0)", PIRQ_IRQ); + dsdt_line(" If (LLess (Local0, 0x03))"); + dsdt_line(" {"); + dsdt_line(" Return (0x00)"); + dsdt_line(" }"); + dsdt_line(" If (LEqual (Local0, 0x08))"); + dsdt_line(" {"); + dsdt_line(" Return (0x00)"); + dsdt_line(" }"); + dsdt_line(" If (LEqual (Local0, 0x0D))"); + dsdt_line(" {"); + dsdt_line(" Return (0x00)"); + dsdt_line(" }"); + dsdt_line(" Return (0x01)"); + dsdt_line("}"); + + for (pin = 0; pin < nitems(pirqs); pin++) { + dsdt_line(""); + dsdt_line("Device (LNK%c)", 'A' + pin); + dsdt_line("{"); + dsdt_line(" Name (_HID, EisaId (\"PNP0C0F\"))"); + dsdt_line(" Name (_UID, 0x%02X)", pin + 1); + dsdt_line(" Method (_STA, 0, NotSerialized)"); + dsdt_line(" {"); + dsdt_line(" If (PIRV (PIR%c))", 'A' + pin); + dsdt_line(" {"); + dsdt_line(" Return (0x0B)"); + dsdt_line(" }"); + dsdt_line(" Else"); + dsdt_line(" {"); + dsdt_line(" Return (0x09)"); + dsdt_line(" }"); + dsdt_line(" }"); + dsdt_line(" Name (_PRS, ResourceTemplate ()"); + dsdt_line(" {"); + dsdt_line(" IRQ (Level, ActiveLow, Shared, )"); + dsdt_line(" {%s}", irq_prs); + dsdt_line(" })"); + dsdt_line(" Name (CB%02X, ResourceTemplate ()", pin + 1); + dsdt_line(" {"); + dsdt_line(" IRQ (Level, ActiveLow, Shared, )"); + dsdt_line(" {}"); + dsdt_line(" })"); + dsdt_line(" CreateWordField (CB%02X, 0x01, CIR%c)", + pin + 1, 'A' + pin); + dsdt_line(" Method (_CRS, 0, NotSerialized)"); + dsdt_line(" {"); + dsdt_line(" And (PIR%c, 0x%02X, Local0)", 'A' + pin, + PIRQ_DIS | PIRQ_IRQ); + dsdt_line(" If (PIRV (Local0))"); + dsdt_line(" {"); + dsdt_line(" ShiftLeft (0x01, Local0, CIR%c)", 'A' + pin); + dsdt_line(" }"); + dsdt_line(" Else"); + dsdt_line(" {"); + dsdt_line(" Store (0x00, CIR%c)", 'A' + pin); + dsdt_line(" }"); + dsdt_line(" Return (CB%02X)", pin + 1); + dsdt_line(" }"); + dsdt_line(" Method (_DIS, 0, NotSerialized)"); + dsdt_line(" {"); + dsdt_line(" Store (0x80, PIR%c)", 'A' + pin); + dsdt_line(" }"); + dsdt_line(" Method (_SRS, 1, NotSerialized)"); + dsdt_line(" {"); + dsdt_line(" CreateWordField (Arg0, 0x01, SIR%c)", 'A' + pin); + dsdt_line(" FindSetRightBit (SIR%c, Local0)", 'A' + pin); + dsdt_line(" Store (Decrement (Local0), PIR%c)", 'A' + pin); + dsdt_line(" }"); + dsdt_line("}"); + } + free(irq_prs); +} +LPC_DSDT(pirq_dsdt); diff --git a/usr.sbin/bhyve/pci_irq.h b/usr.sbin/bhyve/pci_irq.h new file mode 100644 index 000000000000..9d331a5d6321 --- /dev/null +++ b/usr.sbin/bhyve/pci_irq.h @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2014 Advanced Computing Technologies LLC + * Written by: John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __PCI_IRQ_H__ +#define __PCI_IRQ_H__ + +struct pci_devinst; + +void pci_irq_assert(struct pci_devinst *pi); +void pci_irq_deassert(struct pci_devinst *pi); +void pci_irq_init(struct vmctx *ctx); +void pci_irq_reserve(int irq); +void pci_irq_use(int irq); +int pirq_alloc_pin(struct vmctx *ctx); +int pirq_irq(int pin); +uint8_t pirq_read(int pin); +void pirq_write(struct vmctx *ctx, int pin, uint8_t val); + +#endif diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c index f5e4a697c83a..6b61b7afd0a3 100644 --- a/usr.sbin/bhyve/pci_lpc.c +++ b/usr.sbin/bhyve/pci_lpc.c @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include "acpi.h" #include "inout.h" #include "pci_emul.h" +#include "pci_irq.h" #include "pci_lpc.h" #include "uart_emul.h" @@ -173,6 +174,7 @@ lpc_init(void) "LPC device %s\n", name); return (-1); } + pci_irq_reserve(sc->irq); sc->uart_softc = uart_init(lpc_uart_intr_assert, lpc_uart_intr_deassert, sc); @@ -208,7 +210,21 @@ pci_lpc_write_dsdt(struct pci_devinst *pi) dsdt_line("Device (ISA)"); dsdt_line("{"); dsdt_line(" Name (_ADR, 0x%04X%04X)", pi->pi_slot, pi->pi_func); - dsdt_line(" OperationRegion (P40C, PCI_Config, 0x60, 0x04)"); + dsdt_line(" OperationRegion (LPCR, PCI_Config, 0x00, 0x100)"); + dsdt_line(" Field (LPCR, AnyAcc, NoLock, Preserve)"); + dsdt_line(" {"); + dsdt_line(" Offset (0x60),"); + dsdt_line(" PIRA, 8,"); + dsdt_line(" PIRB, 8,"); + dsdt_line(" PIRC, 8,"); + dsdt_line(" PIRD, 8,"); + dsdt_line(" Offset (0x68),"); + dsdt_line(" PIRE, 8,"); + dsdt_line(" PIRF, 8,"); + dsdt_line(" PIRG, 8,"); + dsdt_line(" PIRH, 8"); + dsdt_line(" }"); + dsdt_line(""); dsdt_indent(1); SET_FOREACH(ldpp, lpc_dsdt_set) { @@ -305,13 +321,34 @@ pci_lpc_uart_dsdt(void) } LPC_DSDT(pci_lpc_uart_dsdt); +static int +pci_lpc_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int coff, int bytes, uint32_t val) +{ + int pirq_pin; + + if (bytes == 1) { + pirq_pin = 0; + if (coff >= 0x60 && coff <= 0x63) + pirq_pin = coff - 0x60 + 1; + if (coff >= 0x68 && coff <= 0x6b) + pirq_pin = coff - 0x68 + 5; + if (pirq_pin != 0) { + pirq_write(ctx, pirq_pin, val); + pci_set_cfgdata8(pi, coff, pirq_read(pirq_pin)); + return (0); + } + } + return (-1); +} + static void pci_lpc_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size, uint64_t value) { } -uint64_t +static uint64_t pci_lpc_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size) { @@ -324,6 +361,7 @@ pci_lpc_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, static int pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { + /* * Do not allow more than one LPC bridge to be configured. */ @@ -356,10 +394,36 @@ pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) return (0); } +char * +lpc_pirq_name(int pin) +{ + char *name; + + if (lpc_bridge == NULL) + return (NULL); + asprintf(&name, "\\_SB.PC00.ISA.LNK%c,", 'A' + pin - 1); + return (name); +} + +void +lpc_pirq_routed(void) +{ + int pin; + + if (lpc_bridge == NULL) + return; + + for (pin = 0; pin < 4; pin++) + pci_set_cfgdata8(lpc_bridge, 0x60 + pin, pirq_read(pin + 1)); + for (pin = 0; pin < 4; pin++) + pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5)); +} + struct pci_devemu pci_de_lpc = { .pe_emu = "lpc", .pe_init = pci_lpc_init, .pe_write_dsdt = pci_lpc_write_dsdt, + .pe_cfgwrite = pci_lpc_cfgwrite, .pe_barwrite = pci_lpc_write, .pe_barread = pci_lpc_read }; diff --git a/usr.sbin/bhyve/pci_lpc.h b/usr.sbin/bhyve/pci_lpc.h index e45bcb97e4d6..55a58653f422 100644 --- a/usr.sbin/bhyve/pci_lpc.h +++ b/usr.sbin/bhyve/pci_lpc.h @@ -66,5 +66,7 @@ struct lpc_sysres { #define SYSRES_MEM(base, length) LPC_SYSRES(LPC_SYSRES_MEM, base, length) int lpc_device_parse(const char *opt); +char *lpc_pirq_name(int pin); +void lpc_pirq_routed(void); #endif diff --git a/usr.sbin/bhyve/pm.c b/usr.sbin/bhyve/pm.c index 99087e44d8c6..67126d8765c7 100644 --- a/usr.sbin/bhyve/pm.c +++ b/usr.sbin/bhyve/pm.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include "acpi.h" #include "inout.h" #include "mevent.h" +#include "pci_irq.h" #include "pci_lpc.h" static pthread_mutex_t pm_lock = PTHREAD_MUTEX_INITIALIZER; @@ -289,3 +290,15 @@ smi_cmd_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, } INOUT_PORT(smi_cmd, SMI_CMD, IOPORT_F_OUT, smi_cmd_handler); SYSRES_IO(SMI_CMD, 1); + +void +sci_init(struct vmctx *ctx) +{ + + /* + * Mark ACPI's SCI as level trigger and bump its use count + * in the PIRQ router. + */ + pci_irq_use(SCI_INT); + vm_isa_set_irq_trigger(ctx, SCI_INT, LEVEL_TRIGGER); +} -- cgit v1.3 From da11f4aa1da2ed00735a3ec0361d46dd3f8a8b4f Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Sat, 24 May 2014 23:12:30 +0000 Subject: Add libvmmapi functions vm_copyin() and vm_copyout() to copy into and out of the guest linear address space. These APIs in turn use a new ioctl 'VM_GLA2GPA' to convert the guest linear address to guest physical. Use the new copyin/copyout APIs when emulating ins/outs instruction in bhyve(8). --- lib/libvmmapi/vmmapi.c | 86 +++++++++++++++++++++++++++++++++++++++++++++ lib/libvmmapi/vmmapi.h | 5 +++ sys/amd64/include/vmm.h | 1 - sys/amd64/include/vmm_dev.h | 12 +++++++ sys/amd64/vmm/vmm_dev.c | 24 +++++++++++++ sys/amd64/vmm/vmm_ioport.c | 16 ++------- usr.sbin/bhyve/inout.c | 47 +++++++++++++------------ 7 files changed, 154 insertions(+), 37 deletions(-) (limited to 'lib/libvmmapi') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 5e630f87d248..45fffcf4827a 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -937,3 +938,88 @@ vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) *capabilities = cap.capabilities; return (error); } + +static int +vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, int *fault, uint64_t *gpa) +{ + struct vm_gla2gpa gg; + int error; + + bzero(&gg, sizeof(struct vm_gla2gpa)); + gg.vcpuid = vcpu; + gg.prot = prot; + gg.gla = gla; + gg.paging = *paging; + + error = ioctl(ctx->fd, VM_GLA2GPA, &gg); + if (error == 0) { + *fault = gg.fault; + *gpa = gg.gpa; + } + return (error); +} + +#ifndef min +#define min(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +int +vm_copyin(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + uint64_t gla, void *vp, size_t len) +{ + char *dst; + const char *src; + uint64_t gpa; + int error, fault, n, off; + + dst = vp; + while (len) { + error = vm_gla2gpa(ctx, vcpu, paging, gla, PROT_READ, + &fault, &gpa); + if (error) + return (-1); + if (fault) + return (1); + + off = gpa & PAGE_MASK; + n = min(len, PAGE_SIZE - off); + src = vm_map_gpa(ctx, gpa, n); + bcopy(src, dst, n); + + gla += n; + dst += n; + len -= n; + } + return (0); +} + +int +vm_copyout(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + const void *vp, uint64_t gla, size_t len) +{ + uint64_t gpa; + char *dst; + const char *src; + int error, fault, n, off; + + src = vp; + while (len) { + error = vm_gla2gpa(ctx, vcpu, paging, gla, PROT_WRITE, + &fault, &gpa); + if (error) + return (-1); + if (fault) + return (1); + + off = gpa & PAGE_MASK; + n = min(len, PAGE_SIZE - off); + dst = vm_map_gpa(ctx, gpa, n); + bcopy(src, dst, n); + + gla += n; + src += n; + len -= n; + } + return (0); +} diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 88e99475cd97..cad41c89ba0e 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -109,6 +109,11 @@ int vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state s); int vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities); +int vm_copyin(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + uint64_t gla_src, void *dst, size_t len); +int vm_copyout(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + const void *src, uint64_t gla_dst, size_t len); + /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 021efafb1ae0..28e2808e89b7 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -427,7 +427,6 @@ struct vm_inout_str { enum vm_reg_name seg_name; struct seg_desc seg_desc; uint64_t gla; /* may be set to VIE_INVALID_GLA */ - uint64_t gpa; }; struct vm_exit { diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index ecafa9ca5e31..f094d519a413 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -168,6 +168,15 @@ struct vm_suspend { enum vm_suspend_how how; }; +struct vm_gla2gpa { + int vcpuid; /* inputs */ + int prot; /* PROT_READ or PROT_WRITE */ + uint64_t gla; + struct vm_guest_paging paging; + int fault; /* outputs */ + uint64_t gpa; +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -180,6 +189,7 @@ enum { IOCNUM_MAP_MEMORY = 10, IOCNUM_GET_MEMORY_SEG = 11, IOCNUM_GET_GPA_PMAP = 12, + IOCNUM_GLA2GPA = 13, /* register/state accessors */ IOCNUM_SET_REGISTER = 20, @@ -289,4 +299,6 @@ enum { _IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap) #define VM_GET_GPA_PMAP \ _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) +#define VM_GLA2GPA \ + _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa) #endif diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index f1d57955767d..05617853641c 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include "vmm_lapic.h" @@ -168,6 +169,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_x2apic *x2apic; struct vm_gpa_pte *gpapte; struct vm_suspend *vmsuspend; + struct vm_gla2gpa *gg; sc = vmmdev_lookup2(cdev); if (sc == NULL) @@ -192,6 +194,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_PPTDEV_MSI: case VM_PPTDEV_MSIX: case VM_SET_X2APIC_STATE: + case VM_GLA2GPA: /* * XXX fragile, handle with care * Assumes that the first field of the ioctl data is the vcpu. @@ -415,6 +418,27 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_GET_HPET_CAPABILITIES: error = vhpet_getcap((struct vm_hpet_cap *)data); break; + case VM_GLA2GPA: { + CTASSERT(PROT_READ == VM_PROT_READ); + CTASSERT(PROT_WRITE == VM_PROT_WRITE); + CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); + gg = (struct vm_gla2gpa *)data; + error = vmm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, + gg->prot, &gg->gpa); + KASSERT(error == 0 || error == 1 || error == -1, + ("%s: vmm_gla2gpa unknown error %d", __func__, error)); + if (error >= 0) { + /* + * error = 0: the translation was successful + * error = 1: a fault was injected into the guest + */ + gg->fault = error; + error = 0; + } else { + error = EFAULT; + } + break; + } default: error = ENOTTY; break; diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c index f9fda2d45402..96f2418a72b4 100644 --- a/sys/amd64/vmm/vmm_ioport.c +++ b/sys/amd64/vmm/vmm_ioport.c @@ -145,7 +145,7 @@ emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) { struct vm_inout_str *vis; uint64_t gla, index, segbase; - int error, in; + int in; vis = &vmexit->u.inout_str; in = vis->inout.in; @@ -197,18 +197,8 @@ emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) } vis->gla = gla; - error = vmm_gla2gpa(vm, vcpuid, &vis->paging, gla, - in ? VM_PROT_WRITE : VM_PROT_READ, &vis->gpa); - KASSERT(error == 0 || error == 1 || error == -1, - ("%s: vmm_gla2gpa unexpected error %d", __func__, error)); - if (error == -1) { - return (EFAULT); - } else if (error == 1) { - return (0); /* Resume guest to handle page fault */ - } else { - *retu = true; - return (0); /* Return to userspace to finish emulation */ - } + *retu = true; + return (0); /* Return to userspace to finish emulation */ } int diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index e7cbd98b57c4..9df3ab47420a 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -102,14 +102,12 @@ int emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) { int addrsize, bytes, flags, in, port, rep; - uint64_t gpa, gpaend; uint32_t val; inout_func_t handler; void *arg; - char *gva; int error, retval; enum vm_reg_name idxreg; - uint64_t index, count; + uint64_t gla, index, count; struct vm_inout_str *vis; bytes = vmexit->u.inout.bytes; @@ -149,10 +147,6 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) /* Count register */ count = vis->count & vie_size2mask(addrsize); - gpa = vis->gpa; - gpaend = rounddown(gpa + PAGE_SIZE, PAGE_SIZE); - gva = paddr_guest2host(ctx, gpa, gpaend - gpa); - if (vie_alignment_check(vis->paging.cpl, bytes, vis->cr0, vis->rflags, vis->gla)) { error = vm_inject_exception2(ctx, vcpu, IDT_AC, 0); @@ -160,26 +154,34 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) return (INOUT_RESTART); } - while (count != 0 && gpa < gpaend) { - /* - * XXX this may not work for unaligned accesses because - * the last access on the page may spill over into the - * adjacent page in the linear address space. This is a - * problem because we don't have a gla2gpa() mapping of - * this adjacent page. - */ - assert(gpaend - gpa >= bytes); - + gla = vis->gla; + while (count) { val = 0; - if (!in) - bcopy(gva, &val, bytes); + if (!in) { + error = vm_copyin(ctx, vcpu, &vis->paging, + gla, &val, bytes); + assert(error == 0 || error == 1 || error == -1); + if (error) { + retval = (error == 1) ? INOUT_RESTART : + INOUT_ERROR; + break; + } + } retval = handler(ctx, vcpu, in, port, bytes, &val, arg); if (retval != 0) break; - if (in) - bcopy(&val, gva, bytes); + if (in) { + error = vm_copyout(ctx, vcpu, &vis->paging, + &val, gla, bytes); + assert(error == 0 || error == 1 || error == -1); + if (error) { + retval = (error == 1) ? INOUT_RESTART : + INOUT_ERROR; + break; + } + } /* Update index */ if (vis->rflags & PSL_D) @@ -188,8 +190,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) index += bytes; count--; - gva += bytes; - gpa += bytes; + gla += bytes; } /* Update index register */ -- cgit v1.3 From 6303b65d355a1d40a1b7a6de3f4988f9f8ee1723 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Mon, 26 May 2014 18:21:08 +0000 Subject: Fix issue with restarting an "insb/insw/insl" instruction because of a page fault on the destination buffer. Prior to this change a page fault would be detected in vm_copyout(). This was done after the I/O port access was done. If the I/O port access had side-effects (e.g. reading the uart FIFO) then restarting the instruction would result in incorrect behavior. Fix this by validating the guest linear address before doing the I/O port emulation. If the validation results in a page fault exception being injected into the guest then the instruction can now be restarted without any side-effects. --- lib/libvmmapi/vmmapi.c | 76 +++++++++++++++++++++++++++++++------------------- lib/libvmmapi/vmmapi.h | 16 ++++++++--- usr.sbin/bhyve/inout.c | 37 +++++++++++------------- 3 files changed, 76 insertions(+), 53 deletions(-) (limited to 'lib/libvmmapi') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 45fffcf4827a..ba2904c68856 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -940,7 +941,7 @@ vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) } static int -vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, +gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, int *fault, uint64_t *gpa) { struct vm_gla2gpa gg; @@ -965,18 +966,20 @@ vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, #endif int -vm_copyin(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint64_t gla, void *vp, size_t len) +vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt) { - char *dst; - const char *src; uint64_t gpa; - int error, fault, n, off; + int error, fault, i, n, off; + + for (i = 0; i < iovcnt; i++) { + iov[i].iov_base = 0; + iov[i].iov_len = 0; + } - dst = vp; while (len) { - error = vm_gla2gpa(ctx, vcpu, paging, gla, PROT_READ, - &fault, &gpa); + assert(iovcnt > 0); + error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, &gpa); if (error) return (-1); if (fault) @@ -984,42 +987,59 @@ vm_copyin(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, off = gpa & PAGE_MASK; n = min(len, PAGE_SIZE - off); - src = vm_map_gpa(ctx, gpa, n); - bcopy(src, dst, n); + + iov->iov_base = (void *)gpa; + iov->iov_len = n; + iov++; + iovcnt--; gla += n; - dst += n; len -= n; } return (0); } -int -vm_copyout(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - const void *vp, uint64_t gla, size_t len) +void +vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len) { - uint64_t gpa; + const char *src; char *dst; + uint64_t gpa; + size_t n; + + dst = vp; + while (len) { + assert(iov->iov_len); + gpa = (uint64_t)iov->iov_base; + n = min(len, iov->iov_len); + src = vm_map_gpa(ctx, gpa, n); + bcopy(src, dst, n); + + iov++; + dst += n; + len -= n; + } +} + +void +vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov, + size_t len) +{ const char *src; - int error, fault, n, off; + char *dst; + uint64_t gpa; + size_t n; src = vp; while (len) { - error = vm_gla2gpa(ctx, vcpu, paging, gla, PROT_WRITE, - &fault, &gpa); - if (error) - return (-1); - if (fault) - return (1); - - off = gpa & PAGE_MASK; - n = min(len, PAGE_SIZE - off); + assert(iov->iov_len); + gpa = (uint64_t)iov->iov_base; + n = min(len, iov->iov_len); dst = vm_map_gpa(ctx, gpa, n); bcopy(src, dst, n); - gla += n; + iov++; src += n; len -= n; } - return (0); } diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index cad41c89ba0e..bab41da7a1a8 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -29,6 +29,7 @@ #ifndef _VMMAPI_H_ #define _VMMAPI_H_ +struct iovec; struct vmctx; enum x2apic_state; @@ -109,10 +110,17 @@ int vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state s); int vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities); -int vm_copyin(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint64_t gla_src, void *dst, size_t len); -int vm_copyout(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - const void *src, uint64_t gla_dst, size_t len); +/* + * Translate the GLA range [gla,gla+len) into GPA segments in 'iov'. + * The 'iovcnt' should be big enough to accomodate all GPA segments. + * Returns 0 on success, 1 on a guest fault condition and -1 otherwise. + */ +int vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt); +void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov, + void *host_dst, size_t len); +void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src, + struct iovec *guest_iov, size_t len); /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index 7b22b46baffe..4aaa54aff847 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -31,6 +31,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include @@ -109,6 +111,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) enum vm_reg_name idxreg; uint64_t gla, index, count; struct vm_inout_str *vis; + struct iovec iov[2]; bytes = vmexit->u.inout.bytes; in = vmexit->u.inout.in; @@ -157,6 +160,15 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) return (INOUT_RESTART); } + error = vm_gla2gpa(ctx, vcpu, &vis->paging, gla, bytes, + in ? PROT_WRITE : PROT_READ, iov, nitems(iov)); + assert(error == 0 || error == 1 || error == -1); + if (error) { + retval = (error == 1) ? INOUT_RESTART : + INOUT_ERROR; + break; + } + if (vie_alignment_check(vis->paging.cpl, bytes, vis->cr0, vis->rflags, gla)) { error = vm_inject_exception2(ctx, vcpu, @@ -165,33 +177,16 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) return (INOUT_RESTART); } - val = 0; - if (!in) { - error = vm_copyin(ctx, vcpu, &vis->paging, - gla, &val, bytes); - assert(error == 0 || error == 1 || error == -1); - if (error) { - retval = (error == 1) ? INOUT_RESTART : - INOUT_ERROR; - break; - } - } + if (!in) + vm_copyin(ctx, vcpu, iov, &val, bytes); retval = handler(ctx, vcpu, in, port, bytes, &val, arg); if (retval != 0) break; - if (in) { - error = vm_copyout(ctx, vcpu, &vis->paging, - &val, gla, bytes); - assert(error == 0 || error == 1 || error == -1); - if (error) { - retval = (error == 1) ? INOUT_RESTART : - INOUT_ERROR; - break; - } - } + if (in) + vm_copyout(ctx, vcpu, &val, iov, bytes); /* Update index */ if (vis->rflags & PSL_D) -- cgit v1.3 From 95ebc360efc984cab758d634f1c357b73650f651 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Sat, 31 May 2014 23:37:34 +0000 Subject: Activate vcpus from bhyve(8) using the ioctl VM_ACTIVATE_CPU instead of doing it implicitly in vmm.ko. Add ioctl VM_GET_CPUS to get the current set of 'active' and 'suspended' cpus and display them via /usr/sbin/bhyvectl using the "--get-active-cpus" and "--get-suspended-cpus" options. This is in preparation for being able to reset virtual machine state without having to destroy and recreate it. --- lib/libvmmapi/vmmapi.c | 44 +++++++++++++++++++++++++++++++++++++++++++- lib/libvmmapi/vmmapi.h | 7 +++++++ sys/amd64/include/vmm.h | 3 ++- sys/amd64/include/vmm_dev.h | 20 ++++++++++++++++++++ sys/amd64/vmm/io/vlapic.c | 4 ---- sys/amd64/vmm/vmm.c | 29 ++++++++++++++++++++--------- sys/amd64/vmm/vmm_dev.c | 29 ++++++++++++++++++++++++++++- usr.sbin/bhyve/bhyverun.c | 13 +++++++++++++ usr.sbin/bhyve/pci_lpc.c | 1 - usr.sbin/bhyvectl/bhyvectl.c | 39 ++++++++++++++++++++++++++++++++++++++- 10 files changed, 171 insertions(+), 18 deletions(-) (limited to 'lib/libvmmapi') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index ba2904c68856..89c782520bcc 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -29,11 +29,12 @@ #include __FBSDID("$FreeBSD$"); -#include +#include #include #include #include #include +#include #include #include @@ -1043,3 +1044,44 @@ vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov, len -= n; } } + +static int +vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus) +{ + struct vm_cpuset vm_cpuset; + int error; + + bzero(&vm_cpuset, sizeof(struct vm_cpuset)); + vm_cpuset.which = which; + vm_cpuset.cpusetsize = sizeof(cpuset_t); + vm_cpuset.cpus = cpus; + + error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset); + return (error); +} + +int +vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus) +{ + + return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus)); +} + +int +vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus) +{ + + return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus)); +} + +int +vm_activate_cpu(struct vmctx *ctx, int vcpu) +{ + struct vm_activate_cpu ac; + int error; + + bzero(&ac, sizeof(struct vm_activate_cpu)); + ac.vcpuid = vcpu; + error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac); + return (error); +} diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index bab41da7a1a8..0f2e3ae57a49 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -29,6 +29,9 @@ #ifndef _VMMAPI_H_ #define _VMMAPI_H_ +#include +#include + struct iovec; struct vmctx; enum x2apic_state; @@ -125,6 +128,10 @@ void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src, /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); +int vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus); +int vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus); +int vm_activate_cpu(struct vmctx *ctx, int vcpu); + /* * FreeBSD specific APIs */ diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index f1902d2fd546..05df325a152a 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -140,8 +140,9 @@ int vm_set_capability(struct vm *vm, int vcpu, int type, int val); int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state); int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state); int vm_apicid2vcpuid(struct vm *vm, int apicid); -void vm_activate_cpu(struct vm *vm, int vcpu); +int vm_activate_cpu(struct vm *vm, int vcpu); cpuset_t vm_active_cpus(struct vm *vm); +cpuset_t vm_suspended_cpus(struct vm *vm); struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index f094d519a413..a6568dc4e2f2 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -177,6 +177,18 @@ struct vm_gla2gpa { uint64_t gpa; }; +struct vm_activate_cpu { + int vcpuid; +}; + +struct vm_cpuset { + int which; + int cpusetsize; + cpuset_t *cpus; +}; +#define VM_ACTIVE_CPUS 0 +#define VM_SUSPENDED_CPUS 1 + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -229,6 +241,10 @@ enum { IOCNUM_ISA_DEASSERT_IRQ = 81, IOCNUM_ISA_PULSE_IRQ = 82, IOCNUM_ISA_SET_IRQ_TRIGGER = 83, + + /* vm_cpuset */ + IOCNUM_ACTIVATE_CPU = 90, + IOCNUM_GET_CPUSET = 91, }; #define VM_RUN \ @@ -301,4 +317,8 @@ enum { _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) #define VM_GLA2GPA \ _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa) +#define VM_ACTIVATE_CPU \ + _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) +#define VM_GET_CPUS \ + _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) #endif diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index d93641c19b4d..4034d34ca756 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -1004,11 +1004,7 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) if (vlapic2->boot_state != BS_SIPI) return (0); - /* - * XXX this assumes that the startup IPI always succeeds - */ vlapic2->boot_state = BS_RUNNING; - vm_activate_cpu(vlapic2->vm, dest); *retu = true; vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 8ebdfd763801..e84359d6e696 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -342,8 +342,6 @@ vm_create(const char *name, struct vm **retvm) struct vm *vm; struct vmspace *vmspace; - const int BSP = 0; - /* * If vmm.ko could not be successfully initialized then don't attempt * to create the virtual machine. @@ -373,8 +371,6 @@ vm_create(const char *name, struct vm **retvm) guest_msrs_init(vm, i); } - vm_activate_cpu(vm, BSP); - *retvm = vm; return (0); } @@ -1294,6 +1290,12 @@ vm_run(struct vm *vm, struct vm_run *vmrun) if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); + if (!CPU_ISSET(vcpuid, &vm->active_cpus)) + return (EINVAL); + + if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) + return (EINVAL); + rptr = &vm->rendezvous_func; sptr = &vm->suspend; pmap = vmspace_pmap(vm->vmspace); @@ -1708,17 +1710,19 @@ vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) return (state); } -void +int vm_activate_cpu(struct vm *vm, int vcpuid) { - KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, - ("vm_activate_cpu: invalid vcpuid %d", vcpuid)); - KASSERT(!CPU_ISSET(vcpuid, &vm->active_cpus), - ("vm_activate_cpu: vcpuid %d is already active", vcpuid)); + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + return (EINVAL); + + if (CPU_ISSET(vcpuid, &vm->active_cpus)) + return (EBUSY); VCPU_CTR0(vm, vcpuid, "activated"); CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); + return (0); } cpuset_t @@ -1728,6 +1732,13 @@ vm_active_cpus(struct vm *vm) return (vm->active_cpus); } +cpuset_t +vm_suspended_cpus(struct vm *vm) +{ + + return (vm->suspended_cpus); +} + void * vcpu_stats(struct vm *vm, int vcpuid) { diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 05617853641c..824389f18a98 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -146,7 +146,8 @@ static int vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct thread *td) { - int error, vcpu, state_changed; + int error, vcpu, state_changed, size; + cpuset_t *cpuset; struct vmmdev_softc *sc; struct vm_memory_segment *seg; struct vm_register *vmreg; @@ -170,6 +171,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_gpa_pte *gpapte; struct vm_suspend *vmsuspend; struct vm_gla2gpa *gg; + struct vm_activate_cpu *vac; + struct vm_cpuset *vm_cpuset; sc = vmmdev_lookup2(cdev); if (sc == NULL) @@ -195,6 +198,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_PPTDEV_MSIX: case VM_SET_X2APIC_STATE: case VM_GLA2GPA: + case VM_ACTIVATE_CPU: /* * XXX fragile, handle with care * Assumes that the first field of the ioctl data is the vcpu. @@ -439,6 +443,29 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, } break; } + case VM_ACTIVATE_CPU: + vac = (struct vm_activate_cpu *)data; + error = vm_activate_cpu(sc->vm, vac->vcpuid); + break; + case VM_GET_CPUS: + error = 0; + vm_cpuset = (struct vm_cpuset *)data; + size = vm_cpuset->cpusetsize; + if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { + error = ERANGE; + break; + } + cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); + if (vm_cpuset->which == VM_ACTIVE_CPUS) + *cpuset = vm_active_cpus(sc->vm); + else if (vm_cpuset->which == VM_SUSPENDED_CPUS) + *cpuset = vm_suspended_cpus(sc->vm); + else + error = EINVAL; + if (error == 0) + error = copyout(cpuset, vm_cpuset->cpus, size); + free(cpuset, M_TEMP); + break; default: error = ENOTTY; break; diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index f9a67cb691fc..1e5d3b33abd2 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -242,6 +242,15 @@ fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) assert(fromcpu == BSP); + /* + * The 'newcpu' must be activated in the context of 'fromcpu'. If + * vm_activate_cpu() is delayed until newcpu's pthread starts running + * then vmm.ko is out-of-sync with bhyve and this can create a race + * with vm_suspend(). + */ + error = vm_activate_cpu(ctx, newcpu); + assert(error == 0); + CPU_SET_ATOMIC(newcpu, &cpumask); /* @@ -532,6 +541,7 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) int error, rc, prevcpu; enum vm_exitcode exitcode; enum vm_suspend_how how; + cpuset_t active_cpus; if (vcpumap[vcpu] != NULL) { error = pthread_setaffinity_np(pthread_self(), @@ -539,6 +549,9 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) assert(error == 0); } + error = vm_active_cpus(ctx, &active_cpus); + assert(CPU_ISSET(vcpu, &active_cpus)); + while (1) { error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); if (error != 0) diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c index 6b61b7afd0a3..e98b1411dfcd 100644 --- a/usr.sbin/bhyve/pci_lpc.c +++ b/usr.sbin/bhyve/pci_lpc.c @@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include #include #include diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c index ceee33a2aeeb..e77f0d77df6f 100644 --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -193,7 +193,9 @@ usage(void) " [--assert-lapic-lvt=]\n" " [--inject-nmi]\n" " [--force-reset]\n" - " [--force-poweroff]\n", + " [--force-poweroff]\n" + " [--get-active-cpus]\n" + " [--get-suspended-cpus]\n", progname); exit(1); } @@ -203,6 +205,7 @@ static int inject_nmi, assert_lapic_lvt; static int force_reset, force_poweroff; static const char *capname; static int create, destroy, get_lowmem, get_highmem; +static int get_active_cpus, get_suspended_cpus; static uint64_t memsize; static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4; static int set_efer, get_efer; @@ -390,6 +393,25 @@ enum { ASSERT_LAPIC_LVT, }; +static void +print_cpus(const char *banner, const cpuset_t *cpus) +{ + int i, first; + + first = 1; + printf("%s:\t", banner); + if (!CPU_EMPTY(cpus)) { + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, cpus)) { + printf("%s%d", first ? " " : ", ", i); + first = 0; + } + } + } else + printf(" (none)"); + printf("\n"); +} + int main(int argc, char *argv[]) { @@ -401,6 +423,7 @@ main(int argc, char *argv[]) uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte; struct vmctx *ctx; int wired; + cpuset_t cpus; uint64_t cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat; uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp; @@ -570,6 +593,8 @@ main(int argc, char *argv[]) { "inject-nmi", NO_ARG, &inject_nmi, 1 }, { "force-reset", NO_ARG, &force_reset, 1 }, { "force-poweroff", NO_ARG, &force_poweroff, 1 }, + { "get-active-cpus", NO_ARG, &get_active_cpus, 1 }, + { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 }, { NULL, 0, NULL, 0 } }; @@ -1529,6 +1554,18 @@ main(int argc, char *argv[]) } } + if (!error && (get_active_cpus || get_all)) { + error = vm_active_cpus(ctx, &cpus); + if (!error) + print_cpus("active cpus", &cpus); + } + + if (!error && (get_suspended_cpus || get_all)) { + error = vm_suspended_cpus(ctx, &cpus); + if (!error) + print_cpus("suspended cpus", &cpus); + } + if (!error && run) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); assert(error == 0); -- cgit v1.3 From 5fcf252f410e7784626d6d5d0e75042be23d4f24 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Sat, 7 Jun 2014 21:36:52 +0000 Subject: Add ioctl(VM_REINIT) to reinitialize the virtual machine state maintained by vmm.ko. This allows the virtual machine to be restarted without having to destroy it first. Reviewed by: grehan --- lib/libvmmapi/vmmapi.c | 7 ++ lib/libvmmapi/vmmapi.h | 1 + sys/amd64/include/vmm.h | 1 + sys/amd64/include/vmm_dev.h | 3 + sys/amd64/vmm/vmm.c | 204 ++++++++++++++++++++++++++--------------- sys/amd64/vmm/vmm_dev.c | 4 + sys/amd64/vmm/vmm_stat.c | 16 +++- sys/amd64/vmm/vmm_stat.h | 5 +- usr.sbin/bhyveload/bhyveload.c | 21 ++++- 9 files changed, 177 insertions(+), 85 deletions(-) (limited to 'lib/libvmmapi') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 89c782520bcc..5ce3d8e7ae82 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -367,6 +367,13 @@ vm_suspend(struct vmctx *ctx, enum vm_suspend_how how) return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend)); } +int +vm_reinit(struct vmctx *ctx) +{ + + return (ioctl(ctx->fd, VM_REINIT, 0)); +} + static int vm_inject_exception_real(struct vmctx *ctx, int vcpu, int vector, int error_code, int error_code_valid) diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 0f2e3ae57a49..4cc429065e46 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -69,6 +69,7 @@ int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval); int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *ret_vmexit); int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how); +int vm_reinit(struct vmctx *ctx); int vm_apicid2vcpu(struct vmctx *ctx, int apicid); int vm_inject_exception(struct vmctx *ctx, int vcpu, int vec); int vm_inject_exception2(struct vmctx *ctx, int vcpu, int vec, int errcode); diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 05df325a152a..00e1d96afe7f 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -105,6 +105,7 @@ extern struct vmm_ops vmm_ops_amd; int vm_create(const char *name, struct vm **retvm); void vm_destroy(struct vm *vm); +int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len); int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index a6568dc4e2f2..9b3b00ded0ba 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -196,6 +196,7 @@ enum { IOCNUM_SET_CAPABILITY = 2, IOCNUM_GET_CAPABILITY = 3, IOCNUM_SUSPEND = 4, + IOCNUM_REINIT = 5, /* memory apis */ IOCNUM_MAP_MEMORY = 10, @@ -251,6 +252,8 @@ enum { _IOWR('v', IOCNUM_RUN, struct vm_run) #define VM_SUSPEND \ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) +#define VM_REINIT \ + _IO('v', IOCNUM_REINIT) #define VM_MAP_MEMORY \ _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment) #define VM_GET_MEMORY_SEG \ diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index e84359d6e696..435ba391e141 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -84,25 +84,31 @@ __FBSDID("$FreeBSD$"); struct vlapic; +/* + * Initialization: + * (a) allocated when vcpu is created + * (i) initialized when vcpu is created and when it is reinitialized + * (o) initialized the first time the vcpu is created + * (x) initialized before use + */ struct vcpu { - int flags; - enum vcpu_state state; - struct mtx mtx; - int hostcpu; /* host cpuid this vcpu last ran on */ - uint64_t guest_msrs[VMM_MSR_NUM]; - struct vlapic *vlapic; - int vcpuid; - struct savefpu *guestfpu; /* guest fpu state */ - uint64_t guest_xcr0; - void *stats; - struct vm_exit exitinfo; - enum x2apic_state x2apic_state; - int nmi_pending; - int extint_pending; - struct vm_exception exception; - int exception_pending; + struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ + enum vcpu_state state; /* (o) vcpu state */ + int hostcpu; /* (o) vcpu's host cpu */ + struct vlapic *vlapic; /* (i) APIC device model */ + enum x2apic_state x2apic_state; /* (i) APIC mode */ + int nmi_pending; /* (i) NMI pending */ + int extint_pending; /* (i) INTR pending */ + struct vm_exception exception; /* (x) exception collateral */ + int exception_pending; /* (i) exception pending */ + struct savefpu *guestfpu; /* (a,i) guest fpu state */ + uint64_t guest_xcr0; /* (i) guest %xcr0 register */ + void *stats; /* (a,i) statistics */ + uint64_t guest_msrs[VMM_MSR_NUM]; /* (i) emulated MSRs */ + struct vm_exit exitinfo; /* (x) exit reason and collateral */ }; +#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) @@ -116,36 +122,33 @@ struct mem_seg { }; #define VM_MAX_MEMORY_SEGMENTS 2 +/* + * Initialization: + * (o) initialized the first time the VM is created + * (i) initialized when VM is created and when it is reinitialized + * (x) initialized before use + */ struct vm { - void *cookie; /* processor-specific data */ - void *iommu; /* iommu-specific data */ - struct vhpet *vhpet; /* virtual HPET */ - struct vioapic *vioapic; /* virtual ioapic */ - struct vatpic *vatpic; /* virtual atpic */ - struct vatpit *vatpit; /* virtual atpit */ - struct vmspace *vmspace; /* guest's address space */ - struct vcpu vcpu[VM_MAXCPU]; - int num_mem_segs; - struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; - char name[VM_MAX_NAMELEN]; - - /* - * Set of active vcpus. - * An active vcpu is one that has been started implicitly (BSP) or - * explicitly (AP) by sending it a startup ipi. - */ - volatile cpuset_t active_cpus; - - struct mtx rendezvous_mtx; - cpuset_t rendezvous_req_cpus; - cpuset_t rendezvous_done_cpus; - void *rendezvous_arg; + void *cookie; /* (i) cpu-specific data */ + void *iommu; /* (x) iommu-specific data */ + struct vhpet *vhpet; /* (i) virtual HPET */ + struct vioapic *vioapic; /* (i) virtual ioapic */ + struct vatpic *vatpic; /* (i) virtual atpic */ + struct vatpit *vatpit; /* (i) virtual atpit */ + volatile cpuset_t active_cpus; /* (i) active vcpus */ + int suspend; /* (i) stop VM execution */ + volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ + volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ + cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ + cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ + void *rendezvous_arg; /* (x) rendezvous func/arg */ vm_rendezvous_func_t rendezvous_func; - - int suspend; - volatile cpuset_t suspended_cpus; - - volatile cpuset_t halted_cpus; + struct mtx rendezvous_mtx; /* (o) rendezvous lock */ + int num_mem_segs; /* (o) guest memory segments */ + struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; + struct vmspace *vmspace; /* (o) guest's address space */ + char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ + struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ }; static int vmm_initialized; @@ -206,31 +209,46 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, "IPI vector used for vcpu notifications"); static void -vcpu_cleanup(struct vm *vm, int i) +vcpu_cleanup(struct vm *vm, int i, bool destroy) { struct vcpu *vcpu = &vm->vcpu[i]; VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); - vmm_stat_free(vcpu->stats); - fpu_save_area_free(vcpu->guestfpu); + if (destroy) { + vmm_stat_free(vcpu->stats); + fpu_save_area_free(vcpu->guestfpu); + } } static void -vcpu_init(struct vm *vm, uint32_t vcpu_id) +vcpu_init(struct vm *vm, int vcpu_id, bool create) { struct vcpu *vcpu; - + + KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU, + ("vcpu_init: invalid vcpu %d", vcpu_id)); + vcpu = &vm->vcpu[vcpu_id]; - vcpu_lock_init(vcpu); - vcpu->hostcpu = NOCPU; - vcpu->vcpuid = vcpu_id; + if (create) { + KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " + "initialized", vcpu_id)); + vcpu_lock_init(vcpu); + vcpu->state = VCPU_IDLE; + vcpu->hostcpu = NOCPU; + vcpu->guestfpu = fpu_save_area_alloc(); + vcpu->stats = vmm_stat_alloc(); + } + vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); + vcpu->nmi_pending = 0; + vcpu->extint_pending = 0; + vcpu->exception_pending = 0; vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; - vcpu->guestfpu = fpu_save_area_alloc(); fpu_save_area_reset(vcpu->guestfpu); - vcpu->stats = vmm_stat_alloc(); + vmm_stat_init(vcpu->stats); + guest_msrs_init(vm, vcpu_id); } struct vm_exit * @@ -335,10 +353,30 @@ static moduledata_t vmm_kmod = { DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); MODULE_VERSION(vmm, 1); +static void +vm_init(struct vm *vm, bool create) +{ + int i; + + vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace)); + vm->iommu = NULL; + vm->vioapic = vioapic_init(vm); + vm->vhpet = vhpet_init(vm); + vm->vatpic = vatpic_init(vm); + vm->vatpit = vatpit_init(vm); + + CPU_ZERO(&vm->active_cpus); + + vm->suspend = 0; + CPU_ZERO(&vm->suspended_cpus); + + for (i = 0; i < VM_MAXCPU; i++) + vcpu_init(vm, i, create); +} + int vm_create(const char *name, struct vm **retvm) { - int i; struct vm *vm; struct vmspace *vmspace; @@ -358,18 +396,11 @@ vm_create(const char *name, struct vm **retvm) vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); strcpy(vm->name, name); + vm->num_mem_segs = 0; vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); - vm->cookie = VMINIT(vm, vmspace_pmap(vmspace)); - vm->vioapic = vioapic_init(vm); - vm->vhpet = vhpet_init(vm); - vm->vatpic = vatpic_init(vm); - vm->vatpit = vatpit_init(vm); - for (i = 0; i < VM_MAXCPU; i++) { - vcpu_init(vm, i); - guest_msrs_init(vm, i); - } + vm_init(vm, true); *retvm = vm; return (0); @@ -385,8 +416,8 @@ vm_free_mem_seg(struct vm *vm, struct mem_seg *seg) bzero(seg, sizeof(*seg)); } -void -vm_destroy(struct vm *vm) +static void +vm_cleanup(struct vm *vm, bool destroy) { int i; @@ -400,21 +431,48 @@ vm_destroy(struct vm *vm) vatpic_cleanup(vm->vatpic); vioapic_cleanup(vm->vioapic); - for (i = 0; i < vm->num_mem_segs; i++) - vm_free_mem_seg(vm, &vm->mem_segs[i]); + for (i = 0; i < VM_MAXCPU; i++) + vcpu_cleanup(vm, i, destroy); - vm->num_mem_segs = 0; + VMCLEANUP(vm->cookie); - for (i = 0; i < VM_MAXCPU; i++) - vcpu_cleanup(vm, i); + if (destroy) { + for (i = 0; i < vm->num_mem_segs; i++) + vm_free_mem_seg(vm, &vm->mem_segs[i]); - VMSPACE_FREE(vm->vmspace); + vm->num_mem_segs = 0; - VMCLEANUP(vm->cookie); + VMSPACE_FREE(vm->vmspace); + vm->vmspace = NULL; + } +} +void +vm_destroy(struct vm *vm) +{ + vm_cleanup(vm, true); free(vm, M_VM); } +int +vm_reinit(struct vm *vm) +{ + int error; + + /* + * A virtual machine can be reset only if all vcpus are suspended. + */ + if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { + vm_cleanup(vm, false); + vm_init(vm, false); + error = 0; + } else { + error = EBUSY; + } + + return (error); +} + const char * vm_name(struct vm *vm) { diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 824389f18a98..f3e31a33df4a 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -220,6 +220,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_BIND_PPTDEV: case VM_UNBIND_PPTDEV: case VM_MAP_MEMORY: + case VM_REINIT: /* * ioctls that operate on the entire virtual machine must * prevent all vcpus from running. @@ -253,6 +254,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, vmsuspend = (struct vm_suspend *)data; error = vm_suspend(sc->vm, vmsuspend->how); break; + case VM_REINIT: + error = vm_reinit(sc->vm); + break; case VM_STAT_DESC: { statdesc = (struct vm_stat_desc *)data; error = vmm_stat_desc_copy(statdesc->index, diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c index e3d699923c0f..ef9f41173fee 100644 --- a/sys/amd64/vmm/vmm_stat.c +++ b/sys/amd64/vmm/vmm_stat.c @@ -52,8 +52,10 @@ static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS]; static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat"); +#define vst_size ((size_t)vst_num_elems * sizeof(uint64_t)) + void -vmm_stat_init(void *arg) +vmm_stat_register(void *arg) { struct vmm_stat_type *vst = arg; @@ -97,11 +99,15 @@ vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf) void * vmm_stat_alloc(void) { - u_long size; - - size = vst_num_elems * sizeof(uint64_t); - return (malloc(size, M_VMM_STAT, M_ZERO | M_WAITOK)); + return (malloc(vst_size, M_VMM_STAT, M_WAITOK)); +} + +void +vmm_stat_init(void *vp) +{ + + bzero(vp, vst_size); } void diff --git a/sys/amd64/vmm/vmm_stat.h b/sys/amd64/vmm/vmm_stat.h index 9110c8f0f0ed..6e98965ac270 100644 --- a/sys/amd64/vmm/vmm_stat.h +++ b/sys/amd64/vmm/vmm_stat.h @@ -49,13 +49,13 @@ struct vmm_stat_type { enum vmm_stat_scope scope; }; -void vmm_stat_init(void *arg); +void vmm_stat_register(void *arg); #define VMM_STAT_DEFINE(type, nelems, desc, scope) \ struct vmm_stat_type type[1] = { \ { -1, nelems, desc, scope } \ }; \ - SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_init, type) + SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type) #define VMM_STAT_DECLARE(type) \ extern struct vmm_stat_type type[1] @@ -71,6 +71,7 @@ void vmm_stat_init(void *arg); VMM_STAT_DEFINE(type, nelems, desc, VMM_STAT_SCOPE_ANY) void *vmm_stat_alloc(void); +void vmm_stat_init(void *vp); void vmm_stat_free(void *vp); /* diff --git a/usr.sbin/bhyveload/bhyveload.c b/usr.sbin/bhyveload/bhyveload.c index c1a54326360b..44424963fed0 100644 --- a/usr.sbin/bhyveload/bhyveload.c +++ b/usr.sbin/bhyveload/bhyveload.c @@ -642,7 +642,7 @@ main(int argc, char** argv) void *h; void (*func)(struct loader_callbacks *, void *, int, int); uint64_t mem_size; - int opt, error; + int opt, error, need_reinit; progname = basename(argv[0]); @@ -691,11 +691,14 @@ main(int argc, char** argv) vmname = argv[0]; + need_reinit = 0; error = vm_create(vmname); - if (error != 0 && errno != EEXIST) { - perror("vm_create"); - exit(1); - + if (error) { + if (errno != EEXIST) { + perror("vm_create"); + exit(1); + } + need_reinit = 1; } ctx = vm_open(vmname); @@ -704,6 +707,14 @@ main(int argc, char** argv) exit(1); } + if (need_reinit) { + error = vm_reinit(ctx); + if (error) { + perror("vm_reinit"); + exit(1); + } + } + error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL); if (error) { perror("vm_setup_memory"); -- cgit v1.3 From be679db4cd69a64d8810c513c2cbea2e6edf0e27 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Tue, 24 Jun 2014 02:02:51 +0000 Subject: Provide APIs to directly get 'lowmem' and 'highmem' size directly. Previously the sizes were inferred indirectly based on the size of the mappings at 0 and 4GB respectively. This works fine as long as size of the allocation is identical to the size of the mapping in the guest's address space. However, if the mapping is disjoint then this assumption falls apart (e.g., due to the legacy BIOS hole between 640KB and 1MB). --- lib/libvmmapi/vmmapi.c | 14 ++++++++++++++ lib/libvmmapi/vmmapi.h | 2 ++ usr.sbin/bhyve/pci_emul.c | 3 +-- usr.sbin/bhyve/rtc.c | 15 +++++---------- usr.sbin/bhyve/smbiostbl.c | 9 ++------- usr.sbin/bhyveload/bhyveload.c | 4 ++-- 6 files changed, 26 insertions(+), 21 deletions(-) (limited to 'lib/libvmmapi') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 5ce3d8e7ae82..9fb2308731e7 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -274,6 +274,20 @@ vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) return (NULL); } +size_t +vm_get_lowmem_size(struct vmctx *ctx) +{ + + return (ctx->lowmem); +} + +size_t +vm_get_highmem_size(struct vmctx *ctx) +{ + + return (ctx->highmem); +} + int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access) diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 4cc429065e46..067eaa0aa26c 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -60,6 +60,8 @@ int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num); uint32_t vm_get_lowmem_limit(struct vmctx *ctx); void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit); void vm_set_memflags(struct vmctx *ctx, int flags); +size_t vm_get_lowmem_size(struct vmctx *ctx); +size_t vm_get_highmem_size(struct vmctx *ctx); int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access); int vm_get_desc(struct vmctx *ctx, int vcpu, int reg, diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index a2c47ec0509c..458ba76480b1 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -1118,8 +1118,7 @@ init_pci(struct vmctx *ctx) * Accesses to memory addresses that are not allocated to system * memory or PCI devices return 0xff's. */ - error = vm_get_memory_seg(ctx, 0, &lowmem, NULL); - assert(error == 0); + lowmem = vm_get_lowmem_size(ctx); memset(&pci_mem_hole, 0, sizeof(struct mem_range)); pci_mem_hole.name = "PCI hole"; diff --git a/usr.sbin/bhyve/rtc.c b/usr.sbin/bhyve/rtc.c index 11877479807c..b3631fc07228 100644 --- a/usr.sbin/bhyve/rtc.c +++ b/usr.sbin/bhyve/rtc.c @@ -343,19 +343,14 @@ rtc_init(struct vmctx *ctx) * 0x34/0x35 - 64KB chunks above 16MB, below 4GB * 0x5b/0x5c/0x5d - 64KB chunks above 4GB */ - err = vm_get_memory_seg(ctx, 0, &lomem, NULL); - assert(err == 0); - - lomem = (lomem - m_16MB) / m_64KB; + lomem = (vm_get_lowmem_size(ctx) - m_16MB) / m_64KB; rtc_nvram[nvoff(RTC_LMEM_LSB)] = lomem; rtc_nvram[nvoff(RTC_LMEM_MSB)] = lomem >> 8; - if (vm_get_memory_seg(ctx, m_4GB, &himem, NULL) == 0) { - himem /= m_64KB; - rtc_nvram[nvoff(RTC_HMEM_LSB)] = himem; - rtc_nvram[nvoff(RTC_HMEM_SB)] = himem >> 8; - rtc_nvram[nvoff(RTC_HMEM_MSB)] = himem >> 16; - } + himem = vm_get_highmem_size(ctx) / m_64KB; + rtc_nvram[nvoff(RTC_HMEM_LSB)] = himem; + rtc_nvram[nvoff(RTC_HMEM_SB)] = himem >> 8; + rtc_nvram[nvoff(RTC_HMEM_MSB)] = himem >> 16; } INOUT_PORT(rtc, IO_RTC, IOPORT_F_INOUT, rtc_addr_handler); diff --git a/usr.sbin/bhyve/smbiostbl.c b/usr.sbin/bhyve/smbiostbl.c index 9d1cfb3198be..d560f022fc2a 100644 --- a/usr.sbin/bhyve/smbiostbl.c +++ b/usr.sbin/bhyve/smbiostbl.c @@ -779,13 +779,8 @@ smbios_build(struct vmctx *ctx) int i; int err; - err = vm_get_memory_seg(ctx, 0, &guest_lomem, NULL); - if (err != 0) - return (err); - - err = vm_get_memory_seg(ctx, 4*GB, &guest_himem, NULL); - if (err != 0) - return (err); + guest_lomem = vm_get_lowmem_size(ctx); + guest_himem = vm_get_highmem_size(ctx); startaddr = paddr_guest2host(ctx, SMBIOS_BASE, SMBIOS_MAX_LENGTH); if (startaddr == NULL) { diff --git a/usr.sbin/bhyveload/bhyveload.c b/usr.sbin/bhyveload/bhyveload.c index 44424963fed0..ff6b26926f3b 100644 --- a/usr.sbin/bhyveload/bhyveload.c +++ b/usr.sbin/bhyveload/bhyveload.c @@ -505,8 +505,8 @@ static void cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem) { - vm_get_memory_seg(ctx, 0, ret_lowmem, NULL); - vm_get_memory_seg(ctx, 4 * GB, ret_highmem, NULL); + *ret_lowmem = vm_get_lowmem_size(ctx); + *ret_highmem = vm_get_highmem_size(ctx); } struct env { -- cgit v1.3 From 091d453222c352732e496226ffceb33c0b165f56 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Sat, 19 Jul 2014 20:59:08 +0000 Subject: Handle nested exceptions in bhyve. A nested exception condition arises when a second exception is triggered while delivering the first exception. Most nested exceptions can be handled serially but some are converted into a double fault. If an exception is generated during delivery of a double fault then the virtual machine shuts down as a result of a triple fault. vm_exit_intinfo() is used to record that a VM-exit happened while an event was being delivered through the IDT. If an exception is triggered while handling the VM-exit it will be treated like a nested exception. vm_entry_intinfo() is used by processor-specific code to get the event to be injected into the guest on the next VM-entry. This function is responsible for deciding the disposition of nested exceptions. --- lib/libvmmapi/vmmapi.c | 29 ++++++ lib/libvmmapi/vmmapi.h | 3 + sys/amd64/include/vmm.h | 40 ++++++-- sys/amd64/include/vmm_dev.h | 12 +++ sys/amd64/vmm/intel/vmx.c | 122 ++++++++++++++--------- sys/amd64/vmm/vmm.c | 225 +++++++++++++++++++++++++++++++++++++++---- sys/amd64/vmm/vmm_dev.c | 12 +++ usr.sbin/bhyve/bhyverun.c | 2 + usr.sbin/bhyve/task_switch.c | 10 +- usr.sbin/bhyvectl/bhyvectl.c | 46 ++++++++- 10 files changed, 424 insertions(+), 77 deletions(-) (limited to 'lib/libvmmapi') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 9fb2308731e7..483aa5199b6c 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -1106,3 +1106,32 @@ vm_activate_cpu(struct vmctx *ctx, int vcpu) error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac); return (error); } + +int +vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2) +{ + struct vm_intinfo vmii; + int error; + + bzero(&vmii, sizeof(struct vm_intinfo)); + vmii.vcpuid = vcpu; + error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii); + if (error == 0) { + *info1 = vmii.info1; + *info2 = vmii.info2; + } + return (error); +} + +int +vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1) +{ + struct vm_intinfo vmii; + int error; + + bzero(&vmii, sizeof(struct vm_intinfo)); + vmii.vcpuid = vcpu; + vmii.info1 = info1; + error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); + return (error); +} diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 067eaa0aa26c..2040c91e205f 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -104,6 +104,9 @@ int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); +int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2); +int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo); + /* * Return a pointer to the statistics buffer. Note that this is not MT-safe. */ diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 9c05b894968b..6895e64037e0 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -34,6 +34,7 @@ enum vm_suspend_how { VM_SUSPEND_RESET, VM_SUSPEND_POWEROFF, VM_SUSPEND_HALT, + VM_SUSPEND_TRIPLEFAULT, VM_SUSPEND_LAST }; @@ -88,6 +89,16 @@ enum x2apic_state { X2APIC_STATE_LAST }; +#define VM_INTINFO_VECTOR(info) ((info) & 0xff) +#define VM_INTINFO_DEL_ERRCODE 0x800 +#define VM_INTINFO_RSVD 0x7ffff000 +#define VM_INTINFO_VALID 0x80000000 +#define VM_INTINFO_TYPE 0x700 +#define VM_INTINFO_HWINTR (0 << 8) +#define VM_INTINFO_NMI (2 << 8) +#define VM_INTINFO_HWEXCEPTION (3 << 8) +#define VM_INTINFO_SWINTR (4 << 8) + #ifdef _KERNEL #define VM_MAX_NAMELEN 32 @@ -278,14 +289,31 @@ struct vatpit *vm_atpit(struct vm *vm); int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme); /* - * Returns 0 if there is no exception pending for this vcpu. Returns 1 if an - * exception is pending and also updates 'vme'. The pending exception is - * cleared when this function returns. + * This function is called after a VM-exit that occurred during exception or + * interrupt delivery through the IDT. The format of 'intinfo' is described + * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2. * - * This function should only be called in the context of the thread that is - * executing this vcpu. + * If a VM-exit handler completes the event delivery successfully then it + * should call vm_exit_intinfo() to extinguish the pending event. For e.g., + * if the task switch emulation is triggered via a task gate then it should + * call this function with 'intinfo=0' to indicate that the external event + * is not pending anymore. + * + * Return value is 0 on success and non-zero on failure. */ -int vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *vme); +int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo); + +/* + * This function is called before every VM-entry to retrieve a pending + * event that should be injected into the guest. This function combines + * nested events into a double or triple fault. + * + * Returns 0 if there are no events that need to be injected into the guest + * and non-zero otherwise. + */ +int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info); + +int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2); void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */ void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */ diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index 9b3b00ded0ba..e4d839ef6549 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -189,6 +189,12 @@ struct vm_cpuset { #define VM_ACTIVE_CPUS 0 #define VM_SUSPENDED_CPUS 1 +struct vm_intinfo { + int vcpuid; + uint64_t info1; + uint64_t info2; +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -211,6 +217,8 @@ enum { IOCNUM_GET_SEGMENT_DESCRIPTOR = 23, /* interrupt injection */ + IOCNUM_GET_INTINFO = 28, + IOCNUM_SET_INTINFO = 29, IOCNUM_INJECT_EXCEPTION = 30, IOCNUM_LAPIC_IRQ = 31, IOCNUM_INJECT_NMI = 32, @@ -324,4 +332,8 @@ enum { _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) #define VM_GET_CPUS \ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) +#define VM_SET_INTINFO \ + _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo) +#define VM_GET_INTINFO \ + _IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo) #endif diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 271f8ce173e2..22732a276e8d 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -1213,22 +1213,31 @@ vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic) { struct vm_exception exc; int vector, need_nmi_exiting, extint_pending; - uint64_t rflags; + uint64_t rflags, entryinfo; uint32_t gi, info; - if (vm_exception_pending(vmx->vm, vcpu, &exc)) { - KASSERT(exc.vector >= 0 && exc.vector < 32, - ("%s: invalid exception vector %d", __func__, exc.vector)); + if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) { + KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry " + "intinfo is not valid: %#lx", __func__, entryinfo)); info = vmcs_read(VMCS_ENTRY_INTR_INFO); KASSERT((info & VMCS_INTR_VALID) == 0, ("%s: cannot inject " "pending exception %d: %#x", __func__, exc.vector, info)); - info = exc.vector | VMCS_INTR_T_HWEXCEPTION | VMCS_INTR_VALID; - if (exc.error_code_valid) { - info |= VMCS_INTR_DEL_ERRCODE; - vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, exc.error_code); + info = entryinfo; + vector = info & 0xff; + if (vector == IDT_BP || vector == IDT_OF) { + /* + * VT-x requires #BP and #OF to be injected as software + * exceptions. + */ + info &= ~VMCS_INTR_T_MASK; + info |= VMCS_INTR_T_SWEXCEPTION; } + + if (info & VMCS_INTR_DEL_ERRCODE) + vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, entryinfo >> 32); + vmcs_write(VMCS_ENTRY_INTR_INFO, info); } @@ -1407,6 +1416,16 @@ vmx_clear_nmi_blocking(struct vmx *vmx, int vcpuid) vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); } +static void +vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid) +{ + uint32_t gi; + + gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); + KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING, + ("NMI blocking is not in effect %#x", gi)); +} + static int vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) { @@ -2050,7 +2069,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) struct vm_task_switch *ts; uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; uint32_t intr_type, reason; - uint64_t qual, gpa; + uint64_t exitintinfo, qual, gpa; bool retu; CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0); @@ -2070,47 +2089,49 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * be handled specially by re-injecting the event if the IDT * vectoring information field's valid bit is set. * - * If the VM-exit is due to a task gate in the IDT then we don't - * reinject the event because emulating the task switch also - * completes the event delivery. - * * See "Information for VM Exits During Event Delivery" in Intel SDM * for details. */ - switch (reason) { - case EXIT_REASON_EPT_FAULT: - case EXIT_REASON_EPT_MISCONFIG: - case EXIT_REASON_APIC_ACCESS: - case EXIT_REASON_TASK_SWITCH: - case EXIT_REASON_EXCEPTION: - idtvec_info = vmcs_idt_vectoring_info(); - VCPU_CTR2(vmx->vm, vcpu, "vm exit %s: idtvec_info 0x%08x", - exit_reason_to_str(reason), idtvec_info); - if ((idtvec_info & VMCS_IDT_VEC_VALID) && - (reason != EXIT_REASON_TASK_SWITCH)) { - idtvec_info &= ~(1 << 12); /* clear undefined bit */ - vmcs_write(VMCS_ENTRY_INTR_INFO, idtvec_info); - if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { - idtvec_err = vmcs_idt_vectoring_err(); - vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, - idtvec_err); - } - /* - * If 'virtual NMIs' are being used and the VM-exit - * happened while injecting an NMI during the previous - * VM-entry, then clear "blocking by NMI" in the Guest - * Interruptibility-state. - */ - if ((idtvec_info & VMCS_INTR_T_MASK) == - VMCS_INTR_T_NMI) { - vmx_clear_nmi_blocking(vmx, vcpu); - } + idtvec_info = vmcs_idt_vectoring_info(); + if (idtvec_info & VMCS_IDT_VEC_VALID) { + idtvec_info &= ~(1 << 12); /* clear undefined bit */ + exitintinfo = idtvec_info; + if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { + idtvec_err = vmcs_idt_vectoring_err(); + exitintinfo |= (uint64_t)idtvec_err << 32; + } + error = vm_exit_intinfo(vmx->vm, vcpu, exitintinfo); + KASSERT(error == 0, ("%s: vm_set_intinfo error %d", + __func__, error)); + + /* + * If 'virtual NMIs' are being used and the VM-exit + * happened while injecting an NMI during the previous + * VM-entry, then clear "blocking by NMI" in the + * Guest Interruptibility-State so the NMI can be + * reinjected on the subsequent VM-entry. + * + * However, if the NMI was being delivered through a task + * gate, then the new task must start execution with NMIs + * blocked so don't clear NMI blocking in this case. + */ + intr_type = idtvec_info & VMCS_INTR_T_MASK; + if (intr_type == VMCS_INTR_T_NMI) { + if (reason != EXIT_REASON_TASK_SWITCH) + vmx_clear_nmi_blocking(vmx, vcpu); + else + vmx_assert_nmi_blocking(vmx, vcpu); + } + + /* + * Update VM-entry instruction length if the event being + * delivered was a software interrupt or software exception. + */ + if (intr_type == VMCS_INTR_T_SWINTR || + intr_type == VMCS_INTR_T_PRIV_SWEXCEPTION || + intr_type == VMCS_INTR_T_SWEXCEPTION) { vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); } - break; - default: - idtvec_info = 0; - break; } switch (reason) { @@ -2136,7 +2157,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) */ if (ts->reason == TSR_IDT_GATE) { KASSERT(idtvec_info & VMCS_IDT_VEC_VALID, - ("invalid idtvec_info %x for IDT task switch", + ("invalid idtvec_info %#x for IDT task switch", idtvec_info)); intr_type = idtvec_info & VMCS_INTR_T_MASK; if (intr_type != VMCS_INTR_T_SWINTR && @@ -2302,6 +2323,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * the guest. * * See "Resuming Guest Software after Handling an Exception". + * See "Information for VM Exits Due to Vectored Events". */ if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && (intr_info & 0xff) != IDT_DF && @@ -2519,6 +2541,13 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap, * pmap_invalidate_ept(). */ disable_intr(); + vmx_inject_interrupts(vmx, vcpu, vlapic); + + /* + * Check for vcpu suspension after injecting events because + * vmx_inject_interrupts() can suspend the vcpu due to a + * triple fault. + */ if (vcpu_suspended(suspend_cookie)) { enable_intr(); vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip()); @@ -2539,7 +2568,6 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap, break; } - vmx_inject_interrupts(vmx, vcpu, vlapic); vmx_run_trace(vmx, vcpu); rc = vmx_enter_guest(vmxctx, vmx, launched); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index d1d9d5a52a63..25042546b39e 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -97,6 +97,7 @@ struct vcpu { int hostcpu; /* (o) vcpu's host cpu */ struct vlapic *vlapic; /* (i) APIC device model */ enum x2apic_state x2apic_state; /* (i) APIC mode */ + uint64_t exitintinfo; /* (i) events pending at VM exit */ int nmi_pending; /* (i) NMI pending */ int extint_pending; /* (i) INTR pending */ struct vm_exception exception; /* (x) exception collateral */ @@ -241,6 +242,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create) vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); + vcpu->exitintinfo = 0; vcpu->nmi_pending = 0; vcpu->extint_pending = 0; vcpu->exception_pending = 0; @@ -1457,6 +1459,202 @@ restart: return (error); } +int +vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) +{ + struct vcpu *vcpu; + int type, vector; + + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + return (EINVAL); + + vcpu = &vm->vcpu[vcpuid]; + + if (info & VM_INTINFO_VALID) { + type = info & VM_INTINFO_TYPE; + vector = info & 0xff; + if (type == VM_INTINFO_NMI && vector != IDT_NMI) + return (EINVAL); + if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) + return (EINVAL); + if (info & VM_INTINFO_RSVD) + return (EINVAL); + } else { + info = 0; + } + VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); + vcpu->exitintinfo = info; + return (0); +} + +enum exc_class { + EXC_BENIGN, + EXC_CONTRIBUTORY, + EXC_PAGEFAULT +}; + +#define IDT_VE 20 /* Virtualization Exception (Intel specific) */ + +static enum exc_class +exception_class(uint64_t info) +{ + int type, vector; + + KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); + type = info & VM_INTINFO_TYPE; + vector = info & 0xff; + + /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ + switch (type) { + case VM_INTINFO_HWINTR: + case VM_INTINFO_SWINTR: + case VM_INTINFO_NMI: + return (EXC_BENIGN); + default: + /* + * Hardware exception. + * + * SVM and VT-x use identical type values to represent NMI, + * hardware interrupt and software interrupt. + * + * SVM uses type '3' for all exceptions. VT-x uses type '3' + * for exceptions except #BP and #OF. #BP and #OF use a type + * value of '5' or '6'. Therefore we don't check for explicit + * values of 'type' to classify 'intinfo' into a hardware + * exception. + */ + break; + } + + switch (vector) { + case IDT_PF: + case IDT_VE: + return (EXC_PAGEFAULT); + case IDT_DE: + case IDT_TS: + case IDT_NP: + case IDT_SS: + case IDT_GP: + return (EXC_CONTRIBUTORY); + default: + return (EXC_BENIGN); + } +} + +static int +nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, + uint64_t *retinfo) +{ + enum exc_class exc1, exc2; + int type1, vector1; + + KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); + KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); + + /* + * If an exception occurs while attempting to call the double-fault + * handler the processor enters shutdown mode (aka triple fault). + */ + type1 = info1 & VM_INTINFO_TYPE; + vector1 = info1 & 0xff; + if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { + VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", + info1, info2); + vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); + *retinfo = 0; + return (0); + } + + /* + * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 + */ + exc1 = exception_class(info1); + exc2 = exception_class(info2); + if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || + (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { + /* Convert nested fault into a double fault. */ + *retinfo = IDT_DF; + *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; + *retinfo |= VM_INTINFO_DEL_ERRCODE; + } else { + /* Handle exceptions serially */ + *retinfo = info2; + } + return (1); +} + +static uint64_t +vcpu_exception_intinfo(struct vcpu *vcpu) +{ + uint64_t info = 0; + + if (vcpu->exception_pending) { + info = vcpu->exception.vector & 0xff; + info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; + if (vcpu->exception.error_code_valid) { + info |= VM_INTINFO_DEL_ERRCODE; + info |= (uint64_t)vcpu->exception.error_code << 32; + } + } + return (info); +} + +int +vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) +{ + struct vcpu *vcpu; + uint64_t info1, info2; + int valid; + + KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); + + vcpu = &vm->vcpu[vcpuid]; + + info1 = vcpu->exitintinfo; + vcpu->exitintinfo = 0; + + info2 = 0; + if (vcpu->exception_pending) { + info2 = vcpu_exception_intinfo(vcpu); + vcpu->exception_pending = 0; + VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", + vcpu->exception.vector, info2); + } + + if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { + valid = nested_fault(vm, vcpuid, info1, info2, retinfo); + } else if (info1 & VM_INTINFO_VALID) { + *retinfo = info1; + valid = 1; + } else if (info2 & VM_INTINFO_VALID) { + *retinfo = info2; + valid = 1; + } else { + valid = 0; + } + + if (valid) { + VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " + "retinfo(%#lx)", __func__, info1, info2, *retinfo); + } + + return (valid); +} + +int +vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) +{ + struct vcpu *vcpu; + + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) + return (EINVAL); + + vcpu = &vm->vcpu[vcpuid]; + *info1 = vcpu->exitintinfo; + *info2 = vcpu_exception_intinfo(vcpu); + return (0); +} + int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) { @@ -1468,6 +1666,14 @@ vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) if (exception->vector < 0 || exception->vector >= 32) return (EINVAL); + /* + * A double fault exception should never be injected directly into + * the guest. It is a derived exception that results from specific + * combinations of nested faults. + */ + if (exception->vector == IDT_DF) + return (EINVAL); + vcpu = &vm->vcpu[vcpuid]; if (vcpu->exception_pending) { @@ -1483,25 +1689,6 @@ vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) return (0); } -int -vm_exception_pending(struct vm *vm, int vcpuid, struct vm_exception *exception) -{ - struct vcpu *vcpu; - int pending; - - KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); - - vcpu = &vm->vcpu[vcpuid]; - pending = vcpu->exception_pending; - if (pending) { - vcpu->exception_pending = 0; - *exception = vcpu->exception; - VCPU_CTR1(vm, vcpuid, "Exception %d delivered", - exception->vector); - } - return (pending); -} - static void vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception) { diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index f3e31a33df4a..a85109edaa1d 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -173,6 +173,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_gla2gpa *gg; struct vm_activate_cpu *vac; struct vm_cpuset *vm_cpuset; + struct vm_intinfo *vmii; sc = vmmdev_lookup2(cdev); if (sc == NULL) @@ -199,6 +200,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_SET_X2APIC_STATE: case VM_GLA2GPA: case VM_ACTIVATE_CPU: + case VM_SET_INTINFO: + case VM_GET_INTINFO: /* * XXX fragile, handle with care * Assumes that the first field of the ioctl data is the vcpu. @@ -470,6 +473,15 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, error = copyout(cpuset, vm_cpuset->cpus, size); free(cpuset, M_TEMP); break; + case VM_SET_INTINFO: + vmii = (struct vm_intinfo *)data; + error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); + break; + case VM_GET_INTINFO: + vmii = (struct vm_intinfo *)data; + error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, + &vmii->info2); + break; default: error = ENOTTY; break; diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index 457ec513a0d4..2b95d9cf880a 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -534,6 +534,8 @@ vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) exit(1); case VM_SUSPEND_HALT: exit(2); + case VM_SUSPEND_TRIPLEFAULT: + exit(3); default: fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); exit(100); diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c index b2f5bedb862f..e946807aa8e5 100644 --- a/usr.sbin/bhyve/task_switch.c +++ b/usr.sbin/bhyve/task_switch.c @@ -904,10 +904,14 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) */ /* - * XXX is the original task switch was triggered by a hardware - * exception then do we generate a double-fault if we encounter - * an exception during the task switch? + * If the task switch was triggered by an event delivered through + * the IDT then extinguish the pending event from the vcpu's + * exitintinfo. */ + if (task_switch->reason == TSR_IDT_GATE) { + error = vm_set_intinfo(ctx, vcpu, 0); + assert(error == 0); + } /* * XXX should inject debug exception if 'T' bit is 1 diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c index e77f0d77df6f..b6006b72a767 100644 --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -195,7 +195,8 @@ usage(void) " [--force-reset]\n" " [--force-poweroff]\n" " [--get-active-cpus]\n" - " [--get-suspended-cpus]\n", + " [--get-suspended-cpus]\n" + " [--get-intinfo]\n", progname); exit(1); } @@ -205,6 +206,7 @@ static int inject_nmi, assert_lapic_lvt; static int force_reset, force_poweroff; static const char *capname; static int create, destroy, get_lowmem, get_highmem; +static int get_intinfo; static int get_active_cpus, get_suspended_cpus; static uint64_t memsize; static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4; @@ -412,6 +414,37 @@ print_cpus(const char *banner, const cpuset_t *cpus) printf("\n"); } +static void +print_intinfo(const char *banner, uint64_t info) +{ + int type; + + printf("%s:\t", banner); + if (info & VM_INTINFO_VALID) { + type = info & VM_INTINFO_TYPE; + switch (type) { + case VM_INTINFO_HWINTR: + printf("extint"); + break; + case VM_INTINFO_NMI: + printf("nmi"); + break; + case VM_INTINFO_SWINTR: + printf("swint"); + break; + default: + printf("exception"); + break; + } + printf(" vector %d", (int)VM_INTINFO_VECTOR(info)); + if (info & VM_INTINFO_DEL_ERRCODE) + printf(" errcode %#x", (u_int)(info >> 32)); + } else { + printf("n/a"); + } + printf("\n"); +} + int main(int argc, char *argv[]) { @@ -420,7 +453,7 @@ main(int argc, char *argv[]) vm_paddr_t gpa, gpa_pmap; size_t len; struct vm_exit vmexit; - uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte; + uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte, info[2]; struct vmctx *ctx; int wired; cpuset_t cpus; @@ -595,6 +628,7 @@ main(int argc, char *argv[]) { "force-poweroff", NO_ARG, &force_poweroff, 1 }, { "get-active-cpus", NO_ARG, &get_active_cpus, 1 }, { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 }, + { "get-intinfo", NO_ARG, &get_intinfo, 1 }, { NULL, 0, NULL, 0 } }; @@ -1566,6 +1600,14 @@ main(int argc, char *argv[]) print_cpus("suspended cpus", &cpus); } + if (!error && (get_intinfo || get_all)) { + error = vm_get_intinfo(ctx, vcpu, &info[0], &info[1]); + if (!error) { + print_intinfo("pending", info[0]); + print_intinfo("current", info[1]); + } + } + if (!error && run) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); assert(error == 0); -- cgit v1.3 From d665d229cef8b8617a89e94898a4e8d770aedd34 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Wed, 23 Jul 2014 04:28:51 +0000 Subject: Emulate instructions emitted by OpenBSD/i386 version 5.5: - CMP REG, r/m - MOV AX/EAX/RAX, moffset - MOV moffset, AX/EAX/RAX - PUSH r/m --- lib/libvmmapi/vmmapi.c | 40 +++- lib/libvmmapi/vmmapi.h | 9 +- sys/amd64/include/vmm.h | 32 +++ sys/amd64/include/vmm_instruction_emul.h | 4 +- sys/amd64/vmm/vmm.c | 119 ++++++++++- sys/amd64/vmm/vmm_instruction_emul.c | 356 ++++++++++++++++++++++++++----- usr.sbin/bhyve/bhyverun.c | 8 +- usr.sbin/bhyve/inout.c | 12 +- usr.sbin/bhyve/mem.c | 8 +- usr.sbin/bhyve/mem.h | 3 +- usr.sbin/bhyve/task_switch.c | 15 +- 11 files changed, 516 insertions(+), 90 deletions(-) (limited to 'lib/libvmmapi') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 483aa5199b6c..087d0b789f11 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -326,6 +327,16 @@ vm_get_desc(struct vmctx *ctx, int vcpu, int reg, return (error); } +int +vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc) +{ + int error; + + error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit, + &seg_desc->access); + return (error); +} + int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) { @@ -988,7 +999,7 @@ gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, #endif int -vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, +vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt) { uint64_t gpa; @@ -1135,3 +1146,30 @@ vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1) error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); return (error); } + +void +vm_inject_ss(struct vmctx *ctx, int vcpu, int errcode) +{ + int error; + + error = vm_inject_exception2(ctx, vcpu, IDT_SS, errcode); + assert(error == 0); +} + +void +vm_inject_ac(struct vmctx *ctx, int vcpu, int errcode) +{ + int error; + + error = vm_inject_exception2(ctx, vcpu, IDT_AC, errcode); + assert(error == 0); +} + +void +vm_inject_gp(struct vmctx *ctx, int vcpu, int errcode) +{ + int error; + + error = vm_inject_exception2(ctx, vcpu, IDT_GP, errcode); + assert(error == 0); +} diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 2040c91e205f..72d75c002676 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -66,6 +66,8 @@ int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access); int vm_get_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t *base, uint32_t *limit, uint32_t *access); +int vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, + struct seg_desc *seg_desc); int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val); int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval); int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, @@ -124,13 +126,18 @@ int vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities); * The 'iovcnt' should be big enough to accomodate all GPA segments. * Returns 0 on success, 1 on a guest fault condition and -1 otherwise. */ -int vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, +int vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *pg, uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt); void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov, void *host_dst, size_t len); void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src, struct iovec *guest_iov, size_t len); +/* Helper functions to inject exceptions */ +void vm_inject_ss(struct vmctx *ctx, int vcpu, int errcode); +void vm_inject_ac(struct vmctx *ctx, int vcpu, int errcode); +void vm_inject_gp(struct vmctx *ctx, int vcpu, int errcode); + /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 6895e64037e0..6f476b4afa32 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -114,6 +114,7 @@ struct vioapic; struct vlapic; struct vmspace; struct vm_object; +struct vm_guest_paging; struct pmap; typedef int (*vmm_init_func_t)(int ipinum); @@ -317,10 +318,41 @@ int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2); void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */ void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */ +void vm_inject_ac(struct vm *vm, int vcpuid, int errcode); /* #AC */ +void vm_inject_ss(struct vm *vm, int vcpuid, int errcode); /* #SS */ void vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2); enum vm_reg_name vm_segment_name(int seg_encoding); +struct vm_copyinfo { + uint64_t gpa; + size_t len; + void *hva; + void *cookie; +}; + +/* + * Set up 'copyinfo[]' to copy to/from guest linear address space starting + * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for + * a copyin or PROT_WRITE for a copyout. + * + * Returns 0 on success. + * Returns 1 if an exception was injected into the guest. + * Returns -1 otherwise. + * + * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if + * the return value is 0. The 'copyinfo[]' resources should be freed by calling + * 'vm_copy_teardown()' after the copy is done. + */ +int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, + uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, + int num_copyinfo); +void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, + int num_copyinfo); +void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, + void *kaddr, size_t len); +void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, + struct vm_copyinfo *copyinfo, size_t len); #endif /* KERNEL */ #define VM_MAXCPU 16 /* maximum virtual cpus */ diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h index 05b60fb66ddf..bbd3d88d9cf3 100644 --- a/sys/amd64/include/vmm_instruction_emul.h +++ b/sys/amd64/include/vmm_instruction_emul.h @@ -52,8 +52,8 @@ typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa, * s */ int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t mrr, mem_region_write_t mrw, - void *mrarg); + struct vm_guest_paging *paging, mem_region_read_t mrr, + mem_region_write_t mrw, void *mrarg); int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t val, int size); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 25042546b39e..b667b4826690 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1235,8 +1235,8 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) return (0); } - error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, - retu); + error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, + mread, mwrite, retu); return (error); } @@ -1751,6 +1751,30 @@ vm_inject_ud(struct vm *vm, int vcpuid) vm_inject_fault(vm, vcpuid, &udf); } +void +vm_inject_ac(struct vm *vm, int vcpuid, int error_code) +{ + struct vm_exception acf = { + .vector = IDT_AC, + .error_code_valid = 1, + .error_code = error_code + }; + + vm_inject_fault(vm, vcpuid, &acf); +} + +void +vm_inject_ss(struct vm *vm, int vcpuid, int error_code) +{ + struct vm_exception ssf = { + .vector = IDT_SS, + .error_code_valid = 1, + .error_code = error_code + }; + + vm_inject_fault(vm, vcpuid, &ssf); +} + static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); int @@ -2182,6 +2206,97 @@ vm_segment_name(int seg) return (seg_names[seg]); } +void +vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, + int num_copyinfo) +{ + int idx; + + for (idx = 0; idx < num_copyinfo; idx++) { + if (copyinfo[idx].cookie != NULL) + vm_gpa_release(copyinfo[idx].cookie); + } + bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); +} + +int +vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, + uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, + int num_copyinfo) +{ + int error, idx, nused; + size_t n, off, remaining; + void *hva, *cookie; + uint64_t gpa; + + bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); + + nused = 0; + remaining = len; + while (remaining > 0) { + KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); + error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa); + if (error) + return (error); + off = gpa & PAGE_MASK; + n = min(remaining, PAGE_SIZE - off); + copyinfo[nused].gpa = gpa; + copyinfo[nused].len = n; + remaining -= n; + gla += n; + nused++; + } + + for (idx = 0; idx < nused; idx++) { + hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len, + prot, &cookie); + if (hva == NULL) + break; + copyinfo[idx].hva = hva; + copyinfo[idx].cookie = cookie; + } + + if (idx != nused) { + vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); + return (-1); + } else { + return (0); + } +} + +void +vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, + size_t len) +{ + char *dst; + int idx; + + dst = kaddr; + idx = 0; + while (len > 0) { + bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); + len -= copyinfo[idx].len; + dst += copyinfo[idx].len; + idx++; + } +} + +void +vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, + struct vm_copyinfo *copyinfo, size_t len) +{ + const char *src; + int idx; + + src = kaddr; + idx = 0; + while (len > 0) { + bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); + len -= copyinfo[idx].len; + src += copyinfo[idx].len; + idx++; + } +} /* * Return the amount of in-use and wired memory for the VM. Since diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index 7e09ccbb56c0..e8a5f7bcd4eb 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #else /* !_KERNEL */ #include #include +#include #include @@ -65,6 +66,8 @@ enum { VIE_OP_TYPE_AND, VIE_OP_TYPE_OR, VIE_OP_TYPE_TWO_BYTE, + VIE_OP_TYPE_PUSH, + VIE_OP_TYPE_CMP, VIE_OP_TYPE_LAST }; @@ -72,6 +75,7 @@ enum { #define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ #define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */ +#define VIE_OP_F_NO_MODRM (1 << 3) static const struct vie_op two_byte_opcodes[256] = { [0xB6] = { @@ -89,6 +93,10 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x0F, .op_type = VIE_OP_TYPE_TWO_BYTE }, + [0x3B] = { + .op_byte = 0x3B, + .op_type = VIE_OP_TYPE_CMP, + }, [0x88] = { .op_byte = 0x88, .op_type = VIE_OP_TYPE_MOV, @@ -105,6 +113,16 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x8B, .op_type = VIE_OP_TYPE_MOV, }, + [0xA1] = { + .op_byte = 0xA1, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, + }, + [0xA3] = { + .op_byte = 0xA3, + .op_type = VIE_OP_TYPE_MOV, + .op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM, + }, [0xC6] = { /* XXX Group 11 extended opcode - not just MOV */ .op_byte = 0xC6, @@ -132,6 +150,11 @@ static const struct vie_op one_byte_opcodes[256] = { .op_type = VIE_OP_TYPE_OR, .op_flags = VIE_OP_F_IMM8, }, + [0xFF] = { + /* XXX Group 5 extended opcode - not just PUSH */ + .op_byte = 0xFF, + .op_type = VIE_OP_TYPE_PUSH, + } }; /* struct vie.mod */ @@ -284,6 +307,53 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, return (error); } +/* + * Return the status flags that would result from doing (x - y). + */ +static u_long +getcc16(uint16_t x, uint16_t y) +{ + u_long rflags; + + __asm __volatile("sub %1,%2; pushfq; popq %0" : + "=r" (rflags) : "m" (y), "r" (x)); + return (rflags); +} + +static u_long +getcc32(uint32_t x, uint32_t y) +{ + u_long rflags; + + __asm __volatile("sub %1,%2; pushfq; popq %0" : + "=r" (rflags) : "m" (y), "r" (x)); + return (rflags); +} + +static u_long +getcc64(uint64_t x, uint64_t y) +{ + u_long rflags; + + __asm __volatile("sub %1,%2; pushfq; popq %0" : + "=r" (rflags) : "m" (y), "r" (x)); + return (rflags); +} + +static u_long +getcc(int opsize, uint64_t x, uint64_t y) +{ + KASSERT(opsize == 2 || opsize == 4 || opsize == 8, + ("getcc: invalid operand size %d", opsize)); + + if (opsize == 2) + return (getcc16(x, y)); + else if (opsize == 4) + return (getcc32(x, y)); + else + return (getcc64(x, y)); +} + static int emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) @@ -346,6 +416,32 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = vie_update_register(vm, vcpuid, reg, val, size); } break; + case 0xA1: + /* + * MOV from seg:moffset to AX/EAX/RAX + * A1: mov AX, moffs16 + * A1: mov EAX, moffs32 + * REX.W + A1: mov RAX, moffs64 + */ + error = memread(vm, vcpuid, gpa, &val, size, arg); + if (error == 0) { + reg = VM_REG_GUEST_RAX; + error = vie_update_register(vm, vcpuid, reg, val, size); + } + break; + case 0xA3: + /* + * MOV from AX/EAX/RAX to seg:moffset + * A3: mov moffs16, AX + * A3: mov moffs32, EAX + * REX.W + A3: mov moffs64, RAX + */ + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); + if (error == 0) { + val &= size2mask[size]; + error = memwrite(vm, vcpuid, gpa, val, size, arg); + } + break; case 0xC6: /* * MOV from imm8 to mem (ModRM:r/m) @@ -553,10 +649,150 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (error); } +#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V) + +static int +emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +{ + int error, size; + uint64_t op1, op2, rflags, rflags2; + enum vm_reg_name reg; + + size = vie->opsize; + switch (vie->op.op_byte) { + case 0x3B: + /* + * 3B/r CMP r16, r/m16 + * 3B/r CMP r32, r/m32 + * REX.W + 3B/r CMP r64, r/m64 + * + * Compare first operand (reg) with second operand (r/m) and + * set status flags in EFLAGS register. The comparison is + * performed by subtracting the second operand from the first + * operand and then setting the status flags. + */ + + /* Get the first operand */ + reg = gpr_map[vie->reg]; + error = vie_read_register(vm, vcpuid, reg, &op1); + if (error) + return (error); + + /* Get the second operand */ + error = memread(vm, vcpuid, gpa, &op2, size, arg); + if (error) + return (error); + + break; + default: + return (EINVAL); + } + rflags2 = getcc(size, op1, op2); + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + if (error) + return (error); + rflags &= ~RFLAGS_STATUS_BITS; + rflags |= rflags2 & RFLAGS_STATUS_BITS; + + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8); + return (error); +} + +static int +emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ +#ifdef _KERNEL + struct vm_copyinfo copyinfo[2]; +#else + struct iovec copyinfo[2]; +#endif + struct seg_desc ss_desc; + uint64_t cr0, rflags, rsp, stack_gla, val; + int error, size, stackaddrsize; + + /* + * Table A-6, "Opcode Extensions", Intel SDM, Vol 2. + * + * PUSH is part of the group 5 extended opcodes and is identified + * by ModRM:reg = b110. + */ + if ((vie->reg & 7) != 6) + return (EINVAL); + + size = vie->opsize; + /* + * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1 + */ + if (paging->cpu_mode == CPU_MODE_REAL) + stackaddrsize = 2; + else if (paging->cpu_mode == CPU_MODE_64BIT) + stackaddrsize = 8; + else { + /* + * In protected or compability mode the 'B' flag in the + * stack-segment descriptor determines the size of the + * stack pointer. + */ + error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc); + KASSERT(error == 0, ("%s: error %d getting SS descriptor", + __func__, error)); + if (SEG_DESC_DEF32(ss_desc.access)) + stackaddrsize = 4; + else + stackaddrsize = 2; + } + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); + KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); + + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); + KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error)); + + rsp -= size; + if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc, + rsp, size, stackaddrsize, PROT_WRITE, &stack_gla)) { + vm_inject_ss(vm, vcpuid, 0); + return (0); + } + + if (vie_canonical_check(paging->cpu_mode, stack_gla)) { + vm_inject_ss(vm, vcpuid, 0); + return (0); + } + + if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) { + vm_inject_ac(vm, vcpuid, 0); + return (0); + } + + error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, PROT_WRITE, + copyinfo, nitems(copyinfo)); + if (error) + return (error); + + error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); + if (error == 0) { + vm_copyout(vm, vcpuid, &val, copyinfo, size); + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp, + stackaddrsize); + KASSERT(error == 0, ("error %d updating rsp", error)); + } +#ifdef _KERNEL + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); +#endif + return (error); +} + int vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, - void *memarg) + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *memarg) { int error; @@ -564,6 +800,14 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (EINVAL); switch (vie->op.op_type) { + case VIE_OP_TYPE_PUSH: + error = emulate_push(vm, vcpuid, gpa, vie, paging, memread, + memwrite, memarg); + break; + case VIE_OP_TYPE_CMP: + error = emulate_cmp(vm, vcpuid, gpa, vie, + memread, memwrite, memarg); + break; case VIE_OP_TYPE_MOV: error = emulate_mov(vm, vcpuid, gpa, vie, memread, memwrite, memarg); @@ -970,45 +1214,24 @@ fault: } int -vmm_fetch_instruction(struct vm *vm, int cpuid, struct vm_guest_paging *paging, +vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t rip, int inst_length, struct vie *vie) { - int n, error, prot; - uint64_t gpa, off; - void *hpa, *cookie; + struct vm_copyinfo copyinfo[2]; + int error, prot; - /* - * XXX cache previously fetched instructions using 'rip' as the tag - */ - - prot = VM_PROT_READ | VM_PROT_EXECUTE; if (inst_length > VIE_INST_SIZE) panic("vmm_fetch_instruction: invalid length %d", inst_length); - /* Copy the instruction into 'vie' */ - while (vie->num_valid < inst_length) { - error = vmm_gla2gpa(vm, cpuid, paging, rip, prot, &gpa); - if (error) - return (error); - - off = gpa & PAGE_MASK; - n = min(inst_length - vie->num_valid, PAGE_SIZE - off); - - if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL) - break; - - bcopy(hpa, &vie->inst[vie->num_valid], n); - - vm_gpa_release(cookie); - - rip += n; - vie->num_valid += n; + prot = PROT_READ | PROT_EXEC; + error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot, + copyinfo, nitems(copyinfo)); + if (error == 0) { + vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + vie->num_valid = inst_length; } - - if (vie->num_valid == inst_length) - return (0); - else - return (-1); + return (error); } static int @@ -1138,6 +1361,9 @@ decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode) if (cpu_mode == CPU_MODE_REAL) return (-1); + if (vie->op.op_flags & VIE_OP_F_NO_MODRM) + return (0); + if (vie_peek(vie, &x)) return (-1); @@ -1314,24 +1540,14 @@ decode_immediate(struct vie *vie) int i, n; uint8_t x; union { - char buf[8]; + char buf[4]; int8_t signed8; int16_t signed16; int32_t signed32; - int64_t signed64; } u; /* Figure out immediate operand size (if any) */ - if (vie->op.op_flags & VIE_OP_F_MOFFSET) { - /* - * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: - * The memory offset size follows the address-size of the - * instruction. Although this is treated as an immediate - * value during instruction decoding it is interpreted as - * a segment offset by the instruction emulation. - */ - vie->imm_bytes = vie->addrsize; - } else if (vie->op.op_flags & VIE_OP_F_IMM) { + if (vie->op.op_flags & VIE_OP_F_IMM) { /* * Section 2.2.1.5 "Immediates", Intel SDM: * In 64-bit mode the typical size of immediate operands @@ -1350,7 +1566,7 @@ decode_immediate(struct vie *vie) if ((n = vie->imm_bytes) == 0) return (0); - KASSERT(n == 1 || n == 2 || n == 4 || n == 8, + KASSERT(n == 1 || n == 2 || n == 4, ("%s: invalid number of immediate bytes: %d", __func__, n)); for (i = 0; i < n; i++) { @@ -1366,20 +1582,41 @@ decode_immediate(struct vie *vie) vie->immediate = u.signed8; else if (n == 2) vie->immediate = u.signed16; - else if (n == 4) - vie->immediate = u.signed32; else - vie->immediate = u.signed64; + vie->immediate = u.signed32; + return (0); +} - if (vie->op.op_flags & VIE_OP_F_MOFFSET) { - /* - * If the immediate value is going to be interpreted as a - * segment offset then undo the sign-extension above. - */ - vie->immediate &= size2mask[n]; - } +static int +decode_moffset(struct vie *vie) +{ + int i, n; + uint8_t x; + union { + char buf[8]; + uint64_t u64; + } u; + + if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0) + return (0); + /* + * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM: + * The memory offset size follows the address-size of the instruction. + */ + n = vie->addrsize; + KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n)); + + u.u64 = 0; + for (i = 0; i < n; i++) { + if (vie_peek(vie, &x)) + return (-1); + + u.buf[i] = x; + vie_advance(vie); + } + vie->displacement = u.u64; return (0); } @@ -1470,10 +1707,13 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, if (decode_displacement(vie)) return (-1); - + if (decode_immediate(vie)) return (-1); + if (decode_moffset(vie)) + return (-1); + if (verify_inst_length(vie)) return (-1); diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index 2b95d9cf880a..26c6e5378192 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -347,8 +347,7 @@ vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", vme->u.msr.code, *pvcpu); if (strictmsr) { - error = vm_inject_exception2(ctx, *pvcpu, IDT_GP, 0); - assert(error == 0); + vm_inject_gp(ctx, *pvcpu, 0); return (VMEXIT_RESTART); } } @@ -374,8 +373,7 @@ vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", vme->u.msr.code, vme->u.msr.wval, *pvcpu); if (strictmsr) { - error = vm_inject_exception2(ctx, *pvcpu, IDT_GP, 0); - assert(error == 0); + vm_inject_gp(ctx, *pvcpu, 0); return (VMEXIT_RESTART); } } @@ -484,7 +482,7 @@ vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) stats.vmexit_inst_emul++; err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, - &vmexit->u.inst_emul.vie); + &vmexit->u.inst_emul.vie, &vmexit->u.inst_emul.paging); if (err) { if (err == EINVAL) { diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index fe9e0d85625b..145ac1cbcba8 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -157,15 +157,13 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) if (vie_calculate_gla(vis->paging.cpu_mode, vis->seg_name, &vis->seg_desc, index, bytes, addrsize, prot, &gla)) { - error = vm_inject_exception2(ctx, vcpu, - IDT_GP, 0); - assert(error == 0); + vm_inject_gp(ctx, vcpu, 0); retval = INOUT_RESTART; break; } - error = vm_gla2gpa(ctx, vcpu, &vis->paging, gla, bytes, - prot, iov, nitems(iov)); + error = vm_copy_setup(ctx, vcpu, &vis->paging, gla, + bytes, prot, iov, nitems(iov)); assert(error == 0 || error == 1 || error == -1); if (error) { retval = (error == 1) ? INOUT_RESTART : @@ -175,9 +173,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) if (vie_alignment_check(vis->paging.cpl, bytes, vis->cr0, vis->rflags, gla)) { - error = vm_inject_exception2(ctx, vcpu, - IDT_AC, 0); - assert(error == 0); + vm_inject_ac(ctx, vcpu, 0); return (INOUT_RESTART); } diff --git a/usr.sbin/bhyve/mem.c b/usr.sbin/bhyve/mem.c index 7ea630f2a587..37cf055f2ccf 100644 --- a/usr.sbin/bhyve/mem.c +++ b/usr.sbin/bhyve/mem.c @@ -157,7 +157,9 @@ mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) } int -emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie) +emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, + struct vm_guest_paging *paging) + { struct mmio_rb_range *entry; int err; @@ -184,10 +186,10 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie) } assert(entry != NULL); - err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, + err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging, mem_read, mem_write, &entry->mr_param); pthread_rwlock_unlock(&mmio_rwlock); - + return (err); } diff --git a/usr.sbin/bhyve/mem.h b/usr.sbin/bhyve/mem.h index 264bff9e82b0..eb648c145df6 100644 --- a/usr.sbin/bhyve/mem.h +++ b/usr.sbin/bhyve/mem.h @@ -50,7 +50,8 @@ struct mem_range { #define MEM_F_RW 0x3 void init_mem(void); -int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie); +int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie, + struct vm_guest_paging *paging); int register_mem(struct mem_range *memp); int register_mem_fallback(struct mem_range *memp); diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c index e946807aa8e5..64339827ea25 100644 --- a/usr.sbin/bhyve/task_switch.c +++ b/usr.sbin/bhyve/task_switch.c @@ -214,7 +214,7 @@ desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, assert(error == 0); assert(limit >= SEL_LIMIT(sel)); - error = vm_gla2gpa(ctx, vcpu, paging, base + SEL_START(sel), + error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel), sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov)); if (error == 0) { if (doread) @@ -508,9 +508,7 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, */ reserved = ~maxphyaddr | 0x1E6; if (pdpte[i] & reserved) { - error = vm_inject_exception2(ctx, vcpu, - IDT_GP, 0); - assert(error == 0); + vm_inject_gp(ctx, vcpu, 0); return (VMEXIT_RESTART); } } @@ -649,12 +647,11 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, } if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) { - error = vm_inject_exception2(ctx, vcpu, IDT_AC, 1); - assert(error == 0); + vm_inject_ac(ctx, vcpu, 1); return (VMEXIT_RESTART); } - error = vm_gla2gpa(ctx, vcpu, paging, gla, bytes, PROT_WRITE, + error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE, iov, nitems(iov)); assert(error == 0 || error == 1 || error == -1); if (error) { @@ -753,7 +750,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) } /* Fetch the new TSS */ - error = vm_gla2gpa(ctx, vcpu, &sup_paging, nt.base, minlimit + 1, + error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1, PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov)); if (error == 1) { /* Restart vcpu execution to handle the page fault */ @@ -793,7 +790,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) return (error); /* Get the old TSS */ - error = vm_gla2gpa(ctx, vcpu, &sup_paging, ot_base, minlimit + 1, + error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1, PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov)); if (error == 1) { /* Restart vcpu execution to handle the page fault */ -- cgit v1.3 From d37f2adb383c75848d30b1eb5204a1d1d6190373 Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Thu, 24 Jul 2014 01:38:11 +0000 Subject: Fix fault injection in bhyve. The faulting instruction needs to be restarted when the exception handler is done handling the fault. bhyve now does this correctly by setting 'vmexit[vcpu].inst_length' to zero so the %rip is not advanced. A minor complication is that the fault injection APIs are used by instruction emulation code that is shared by vmm.ko and bhyve. Thus the argument that refers to 'struct vm *' in kernel or 'struct vmctx *' in userspace needs to be loosely typed as a 'void *'. --- lib/libvmmapi/vmmapi.c | 27 ----------------- lib/libvmmapi/vmmapi.h | 5 --- sys/amd64/include/vmm.h | 38 +++++++++++++++++++---- sys/amd64/vmm/vmm.c | 72 +++++++++----------------------------------- usr.sbin/bhyve/bhyverun.c | 27 +++++++++++++++-- usr.sbin/bhyve/inout.c | 2 +- usr.sbin/bhyve/task_switch.c | 7 ++--- 7 files changed, 74 insertions(+), 104 deletions(-) (limited to 'lib/libvmmapi') diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 087d0b789f11..93955c7c233e 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -1146,30 +1146,3 @@ vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1) error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); return (error); } - -void -vm_inject_ss(struct vmctx *ctx, int vcpu, int errcode) -{ - int error; - - error = vm_inject_exception2(ctx, vcpu, IDT_SS, errcode); - assert(error == 0); -} - -void -vm_inject_ac(struct vmctx *ctx, int vcpu, int errcode) -{ - int error; - - error = vm_inject_exception2(ctx, vcpu, IDT_AC, errcode); - assert(error == 0); -} - -void -vm_inject_gp(struct vmctx *ctx, int vcpu, int errcode) -{ - int error; - - error = vm_inject_exception2(ctx, vcpu, IDT_GP, errcode); - assert(error == 0); -} diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 72d75c002676..fbb6ddd3acfb 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -133,11 +133,6 @@ void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov, void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src, struct iovec *guest_iov, size_t len); -/* Helper functions to inject exceptions */ -void vm_inject_ss(struct vmctx *ctx, int vcpu, int errcode); -void vm_inject_ac(struct vmctx *ctx, int vcpu, int errcode); -void vm_inject_gp(struct vmctx *ctx, int vcpu, int errcode); - /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 6f476b4afa32..62af24093b28 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -29,6 +29,8 @@ #ifndef _VMM_H_ #define _VMM_H_ +#include + enum vm_suspend_how { VM_SUSPEND_NONE, VM_SUSPEND_RESET, @@ -316,12 +318,6 @@ int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info); int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2); -void vm_inject_gp(struct vm *vm, int vcpuid); /* general protection fault */ -void vm_inject_ud(struct vm *vm, int vcpuid); /* undefined instruction fault */ -void vm_inject_ac(struct vm *vm, int vcpuid, int errcode); /* #AC */ -void vm_inject_ss(struct vm *vm, int vcpuid, int errcode); /* #SS */ -void vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2); - enum vm_reg_name vm_segment_name(int seg_encoding); struct vm_copyinfo { @@ -579,4 +575,34 @@ struct vm_exit { } u; }; +/* APIs to inject faults into the guest */ +void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid, + int errcode); + +static void __inline +vm_inject_ud(void *vm, int vcpuid) +{ + vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0); +} + +static void __inline +vm_inject_gp(void *vm, int vcpuid) +{ + vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0); +} + +static void __inline +vm_inject_ac(void *vm, int vcpuid, int errcode) +{ + vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode); +} + +static void __inline +vm_inject_ss(void *vm, int vcpuid, int errcode) +{ + vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode); +} + +void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2); + #endif /* _VMM_H_ */ diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index b667b4826690..78aefc4b3443 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1689,13 +1689,21 @@ vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) return (0); } -static void -vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception) +void +vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid, + int errcode) { + struct vm_exception exception; struct vm_exit *vmexit; + struct vm *vm; int error; - error = vm_inject_exception(vm, vcpuid, exception); + vm = vmarg; + + exception.vector = vector; + exception.error_code = errcode; + exception.error_code_valid = errcode_valid; + error = vm_inject_exception(vm, vcpuid, &exception); KASSERT(error == 0, ("vm_inject_exception error %d", error)); /* @@ -1710,69 +1718,19 @@ vm_inject_fault(struct vm *vm, int vcpuid, struct vm_exception *exception) } void -vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2) +vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) { - struct vm_exception pf = { - .vector = IDT_PF, - .error_code_valid = 1, - .error_code = error_code - }; + struct vm *vm; int error; + vm = vmarg; VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", error_code, cr2); error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); - vm_inject_fault(vm, vcpuid, &pf); -} - -void -vm_inject_gp(struct vm *vm, int vcpuid) -{ - struct vm_exception gpf = { - .vector = IDT_GP, - .error_code_valid = 1, - .error_code = 0 - }; - - vm_inject_fault(vm, vcpuid, &gpf); -} - -void -vm_inject_ud(struct vm *vm, int vcpuid) -{ - struct vm_exception udf = { - .vector = IDT_UD, - .error_code_valid = 0 - }; - - vm_inject_fault(vm, vcpuid, &udf); -} - -void -vm_inject_ac(struct vm *vm, int vcpuid, int error_code) -{ - struct vm_exception acf = { - .vector = IDT_AC, - .error_code_valid = 1, - .error_code = error_code - }; - - vm_inject_fault(vm, vcpuid, &acf); -} - -void -vm_inject_ss(struct vm *vm, int vcpuid, int error_code) -{ - struct vm_exception ssf = { - .vector = IDT_SS, - .error_code_valid = 1, - .error_code = error_code - }; - - vm_inject_fault(vm, vcpuid, &ssf); + vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code); } static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index 26c6e5378192..e3d5994dce32 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -96,7 +96,7 @@ static cpuset_t cpumask; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); -struct vm_exit vmexit[VM_MAXCPU]; +static struct vm_exit vmexit[VM_MAXCPU]; struct bhyvestats { uint64_t vmexit_bogus; @@ -182,6 +182,27 @@ pincpu_parse(const char *opt) return (0); } +void +vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, + int errcode) +{ + struct vmctx *ctx; + int error; + + ctx = arg; + if (errcode_valid) + error = vm_inject_exception2(ctx, vcpu, vector, errcode); + else + error = vm_inject_exception(ctx, vcpu, vector); + assert(error == 0); + + /* + * Set the instruction length to 0 to ensure that the instruction is + * restarted when the fault handler returns. + */ + vmexit[vcpu].inst_length = 0; +} + void * paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) { @@ -347,7 +368,7 @@ vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", vme->u.msr.code, *pvcpu); if (strictmsr) { - vm_inject_gp(ctx, *pvcpu, 0); + vm_inject_gp(ctx, *pvcpu); return (VMEXIT_RESTART); } } @@ -373,7 +394,7 @@ vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", vme->u.msr.code, vme->u.msr.wval, *pvcpu); if (strictmsr) { - vm_inject_gp(ctx, *pvcpu, 0); + vm_inject_gp(ctx, *pvcpu); return (VMEXIT_RESTART); } } diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index 145ac1cbcba8..447f6c55fde7 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -157,7 +157,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) if (vie_calculate_gla(vis->paging.cpu_mode, vis->seg_name, &vis->seg_desc, index, bytes, addrsize, prot, &gla)) { - vm_inject_gp(ctx, vcpu, 0); + vm_inject_gp(ctx, vcpu); retval = INOUT_RESTART; break; } diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c index 64339827ea25..543c01f4536e 100644 --- a/usr.sbin/bhyve/task_switch.c +++ b/usr.sbin/bhyve/task_switch.c @@ -160,8 +160,6 @@ usd_to_seg_desc(struct user_segment_descriptor *usd) static void sel_exception(struct vmctx *ctx, int vcpu, int vector, uint16_t sel, int ext) { - int error; - /* * Bit 2 from the selector is retained as-is in the error code. * @@ -174,8 +172,7 @@ sel_exception(struct vmctx *ctx, int vcpu, int vector, uint16_t sel, int ext) sel &= ~0x3; if (ext) sel |= 0x1; - error = vm_inject_exception2(ctx, vcpu, vector, sel); - assert(error == 0); + vm_inject_fault(ctx, vcpu, vector, 1, sel); } static int @@ -508,7 +505,7 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, */ reserved = ~maxphyaddr | 0x1E6; if (pdpte[i] & reserved) { - vm_inject_gp(ctx, vcpu, 0); + vm_inject_gp(ctx, vcpu); return (VMEXIT_RESTART); } } -- cgit v1.3