diff options
Diffstat (limited to 'sys/dev/xen/privcmd/privcmd.c')
-rw-r--r-- | sys/dev/xen/privcmd/privcmd.c | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/sys/dev/xen/privcmd/privcmd.c b/sys/dev/xen/privcmd/privcmd.c new file mode 100644 index 000000000000..761fb037b163 --- /dev/null +++ b/sys/dev/xen/privcmd/privcmd.c @@ -0,0 +1,414 @@ +/* + * Copyright (c) 2014 Roger Pau Monné <roger.pau@citrix.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/uio.h> +#include <sys/bus.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/selinfo.h> +#include <sys/poll.h> +#include <sys/conf.h> +#include <sys/fcntl.h> +#include <sys/ioccom.h> +#include <sys/rman.h> +#include <sys/tree.h> +#include <sys/module.h> +#include <sys/proc.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_pager.h> +#include <vm/vm_phys.h> + +#include <machine/md_var.h> + +#include <xen/xen-os.h> +#include <xen/hypervisor.h> +#include <xen/privcmd.h> +#include <xen/error.h> + +MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device"); + +struct privcmd_map { + vm_object_t mem; + vm_size_t size; + struct resource *pseudo_phys_res; + int pseudo_phys_res_id; + vm_paddr_t phys_base_addr; + boolean_t mapped; + int *errs; +}; + +static d_ioctl_t privcmd_ioctl; +static d_mmap_single_t privcmd_mmap_single; + +static struct cdevsw privcmd_devsw = { + .d_version = D_VERSION, + .d_ioctl = privcmd_ioctl, + .d_mmap_single = privcmd_mmap_single, + .d_name = "privcmd", +}; + +static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred, u_short *color); +static void privcmd_pg_dtor(void *handle); +static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, + int prot, vm_page_t *mres); + +static struct cdev_pager_ops privcmd_pg_ops = { + .cdev_pg_fault = privcmd_pg_fault, + .cdev_pg_ctor = privcmd_pg_ctor, + .cdev_pg_dtor = privcmd_pg_dtor, +}; + +static device_t privcmd_dev = NULL; + +/*------------------------- Privcmd Pager functions --------------------------*/ +static int +privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred, u_short *color) +{ + + return (0); +} + +static void +privcmd_pg_dtor(void *handle) +{ + struct xen_remove_from_physmap rm = { .domid = DOMID_SELF }; + struct privcmd_map *map = handle; + int error; + vm_size_t i; + vm_page_t m; + + /* + * Remove the mappings from the used pages. This will remove the + * underlying p2m bindings in Xen second stage translation. + */ + if (map->mapped == true) { + VM_OBJECT_WLOCK(map->mem); +retry: + for (i = 0; i < map->size; i++) { + m = vm_page_lookup(map->mem, i); + if (m == NULL) + continue; + if (vm_page_sleep_if_busy(m, "pcmdum")) + goto retry; + cdev_pager_free_page(map->mem, m); + } + VM_OBJECT_WUNLOCK(map->mem); + + for (i = 0; i < map->size; i++) { + rm.gpfn = atop(map->phys_base_addr) + i; + HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm); + } + free(map->errs, M_PRIVCMD); + } + + vm_phys_fictitious_unreg_range(map->phys_base_addr, + map->phys_base_addr + map->size * PAGE_SIZE); + + error = bus_release_resource(privcmd_dev, SYS_RES_MEMORY, + map->pseudo_phys_res_id, map->pseudo_phys_res); + KASSERT(error == 0, ("Unable to release memory resource: %d", error)); + + free(map, M_PRIVCMD); +} + +static int +privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, + int prot, vm_page_t *mres) +{ + struct privcmd_map *map = object->handle; + vm_pindex_t pidx; + vm_page_t page, oldm; + + if (map->mapped != true) + return (VM_PAGER_FAIL); + + pidx = OFF_TO_IDX(offset); + if (pidx >= map->size || map->errs[pidx] != 0) + return (VM_PAGER_FAIL); + + page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset); + if (page == NULL) + return (VM_PAGER_FAIL); + + KASSERT((page->flags & PG_FICTITIOUS) != 0, + ("not fictitious %p", page)); + KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page)); + KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page)); + + if (*mres != NULL) { + oldm = *mres; + vm_page_lock(oldm); + vm_page_free(oldm); + vm_page_unlock(oldm); + *mres = NULL; + } + + vm_page_insert(page, object, pidx); + page->valid = VM_PAGE_BITS_ALL; + vm_page_xbusy(page); + *mres = page; + return (VM_PAGER_OK); +} + +/*----------------------- Privcmd char device methods ------------------------*/ +static int +privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, + vm_object_t *object, int nprot) +{ + struct privcmd_map *map; + int error; + + map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO); + + map->size = OFF_TO_IDX(size); + map->pseudo_phys_res_id = 0; + + map->pseudo_phys_res = bus_alloc_resource(privcmd_dev, SYS_RES_MEMORY, + &map->pseudo_phys_res_id, 0, ~0, size, RF_ACTIVE); + if (map->pseudo_phys_res == NULL) { + free(map, M_PRIVCMD); + return (ENOMEM); + } + + map->phys_base_addr = rman_get_start(map->pseudo_phys_res); + + error = vm_phys_fictitious_reg_range(map->phys_base_addr, + map->phys_base_addr + size, VM_MEMATTR_DEFAULT); + if (error) { + bus_release_resource(privcmd_dev, SYS_RES_MEMORY, + map->pseudo_phys_res_id, map->pseudo_phys_res); + free(map, M_PRIVCMD); + return (error); + } + + map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops, + size, nprot, *offset, NULL); + if (map->mem == NULL) { + bus_release_resource(privcmd_dev, SYS_RES_MEMORY, + map->pseudo_phys_res_id, map->pseudo_phys_res); + free(map, M_PRIVCMD); + return (ENOMEM); + } + + *object = map->mem; + + return (0); +} + +static int +privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, + int mode, struct thread *td) +{ + int error, i; + + switch (cmd) { + case IOCTL_PRIVCMD_HYPERCALL: { + struct ioctl_privcmd_hypercall *hcall; + + hcall = (struct ioctl_privcmd_hypercall *)arg; + + error = privcmd_hypercall(hcall->op, hcall->arg[0], + hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]); + if (error >= 0) { + hcall->retval = error; + error = 0; + } else { + error = xen_translate_error(error); + hcall->retval = 0; + } + break; + } + case IOCTL_PRIVCMD_MMAPBATCH: { + struct ioctl_privcmd_mmapbatch *mmap; + vm_map_t map; + vm_map_entry_t entry; + vm_object_t mem; + vm_pindex_t index; + vm_prot_t prot; + boolean_t wired; + struct xen_add_to_physmap_range add; + xen_ulong_t *idxs; + xen_pfn_t *gpfns; + int *errs; + struct privcmd_map *umap; + + mmap = (struct ioctl_privcmd_mmapbatch *)arg; + + if ((mmap->num == 0) || + ((mmap->addr & PAGE_MASK) != 0)) { + error = EINVAL; + break; + } + + map = &td->td_proc->p_vmspace->vm_map; + error = vm_map_lookup(&map, mmap->addr, VM_PROT_NONE, &entry, + &mem, &index, &prot, &wired); + if (error != KERN_SUCCESS) { + error = EINVAL; + break; + } + if ((entry->start != mmap->addr) || + (entry->end != mmap->addr + (mmap->num * PAGE_SIZE))) { + vm_map_lookup_done(map, entry); + error = EINVAL; + break; + } + vm_map_lookup_done(map, entry); + if ((mem->type != OBJT_MGTDEVICE) || + (mem->un_pager.devp.ops != &privcmd_pg_ops)) { + error = EINVAL; + break; + } + umap = mem->handle; + + add.domid = DOMID_SELF; + add.space = XENMAPSPACE_gmfn_foreign; + add.size = mmap->num; + add.foreign_domid = mmap->dom; + + idxs = malloc(sizeof(*idxs) * mmap->num, M_PRIVCMD, + M_WAITOK | M_ZERO); + gpfns = malloc(sizeof(*gpfns) * mmap->num, M_PRIVCMD, + M_WAITOK | M_ZERO); + errs = malloc(sizeof(*errs) * mmap->num, M_PRIVCMD, + M_WAITOK | M_ZERO); + + set_xen_guest_handle(add.idxs, idxs); + set_xen_guest_handle(add.gpfns, gpfns); + set_xen_guest_handle(add.errs, errs); + + error = copyin(&mmap->arr[0], idxs, + sizeof(idxs[0]) * mmap->num); + if (error != 0) + goto mmap_out; + + for (i = 0; i < mmap->num; i++) + gpfns[i] = atop(umap->phys_base_addr + i * PAGE_SIZE); + + error = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &add); + if (error) { + error = xen_translate_error(error); + goto mmap_out; + } + + for (i = 0; i < mmap->num; i++) { + if (errs[i] != 0) + errs[i] = xen_translate_error(errs[i]); + } + + /* + * Save errs, so we know which pages have been + * successfully mapped. + */ + umap->errs = errs; + umap->mapped = true; + + error = copyout(errs, &mmap->err[0], + sizeof(errs[0]) * mmap->num); + +mmap_out: + free(idxs, M_PRIVCMD); + free(gpfns, M_PRIVCMD); + if (!umap->mapped) + free(errs, M_PRIVCMD); + + break; + } + + default: + error = ENOSYS; + break; + } + + return (error); +} + +/*------------------ Private Device Attachment Functions --------------------*/ +static void +privcmd_identify(driver_t *driver, device_t parent) +{ + + KASSERT(xen_domain(), + ("Trying to attach privcmd device on non Xen domain")); + + if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL) + panic("unable to attach privcmd user-space device"); +} + +static int +privcmd_probe(device_t dev) +{ + + privcmd_dev = dev; + device_set_desc(dev, "Xen privileged interface user-space device"); + return (BUS_PROBE_NOWILDCARD); +} + +static int +privcmd_attach(device_t dev) +{ + + make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT, + GID_WHEEL, 0600, "xen/privcmd"); + return (0); +} + +/*-------------------- Private Device Attachment Data -----------------------*/ +static device_method_t privcmd_methods[] = { + DEVMETHOD(device_identify, privcmd_identify), + DEVMETHOD(device_probe, privcmd_probe), + DEVMETHOD(device_attach, privcmd_attach), + + DEVMETHOD_END +}; + +static driver_t privcmd_driver = { + "privcmd", + privcmd_methods, + 0, +}; + +devclass_t privcmd_devclass; + +DRIVER_MODULE(privcmd, xenpv, privcmd_driver, privcmd_devclass, 0, 0); +MODULE_DEPEND(privcmd, xenpv, 1, 1, 1); |