aboutsummaryrefslogtreecommitdiff
path: root/sys/dev
diff options
context:
space:
mode:
authorPeter Grehan <grehan@FreeBSD.org>2012-04-14 05:48:04 +0000
committerPeter Grehan <grehan@FreeBSD.org>2012-04-14 05:48:04 +0000
commitb8a587074f23824a41ce19a0c8dee19ad0507062 (patch)
treedb02e2bdce1e9ff52725c3e0ed2217280536d6ea /sys/dev
parent27ce86b8b6261fa388a66ca271f02b21dea41a39 (diff)
Notes
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/virtio/balloon/virtio_balloon.c7
-rw-r--r--sys/dev/virtio/block/virtio_blk.c237
-rw-r--r--sys/dev/virtio/network/if_vtnet.c2
-rw-r--r--sys/dev/virtio/pci/virtio_pci.c2
-rw-r--r--sys/dev/virtio/virtio_ring.h23
-rw-r--r--sys/dev/virtio/virtqueue.c84
-rw-r--r--sys/dev/virtio/virtqueue.h1
7 files changed, 254 insertions, 102 deletions
diff --git a/sys/dev/virtio/balloon/virtio_balloon.c b/sys/dev/virtio/balloon/virtio_balloon.c
index 61ae4b127730..d589a733c25c 100644
--- a/sys/dev/virtio/balloon/virtio_balloon.c
+++ b/sys/dev/virtio/balloon/virtio_balloon.c
@@ -122,6 +122,9 @@ static void vtballoon_add_sysctl(struct vtballoon_softc *);
*/
#define VTBALLOON_PAGES_PER_REQUEST 256
+/* Must be able to fix all pages frames in one page (segment). */
+CTASSERT(VTBALLOON_PAGES_PER_REQUEST * sizeof(uint32_t) <= PAGE_SIZE);
+
#define VTBALLOON_MTX(_sc) &(_sc)->vtballoon_mtx
#define VTBALLOON_LOCK_INIT(_sc, _name) mtx_init(VTBALLOON_MTX((_sc)), _name, \
"VirtIO Balloon Lock", MTX_SPIN)
@@ -138,7 +141,7 @@ static device_method_t vtballoon_methods[] = {
/* VirtIO methods. */
DEVMETHOD(virtio_config_change, vtballoon_config_change),
- { 0, 0 }
+ DEVMETHOD_END
};
static driver_t vtballoon_driver = {
@@ -402,13 +405,13 @@ vtballoon_send_page_frames(struct vtballoon_softc *sc, struct virtqueue *vq,
error = virtqueue_enqueue(vq, vq, &sg, 1, 0);
KASSERT(error == 0, ("error enqueuing page frames to virtqueue"));
+ virtqueue_notify(vq);
/*
* Inflate and deflate operations are done synchronously. The
* interrupt handler will wake us up.
*/
VTBALLOON_LOCK(sc);
- virtqueue_notify(vq);
while ((c = virtqueue_dequeue(vq, NULL)) == NULL)
msleep_spin(sc, VTBALLOON_MTX(sc), "vtbspf", 0);
diff --git a/sys/dev/virtio/block/virtio_blk.c b/sys/dev/virtio/block/virtio_blk.c
index dca19ba80239..9442a6ddd7e7 100644
--- a/sys/dev/virtio/block/virtio_blk.c
+++ b/sys/dev/virtio/block/virtio_blk.c
@@ -70,8 +70,8 @@ struct vtblk_softc {
uint32_t vtblk_flags;
#define VTBLK_FLAG_INDIRECT 0x0001
#define VTBLK_FLAG_READONLY 0x0002
-#define VTBLK_FLAG_DETACHING 0x0004
-#define VTBLK_FLAG_SUSPENDED 0x0008
+#define VTBLK_FLAG_DETACH 0x0004
+#define VTBLK_FLAG_SUSPEND 0x0008
#define VTBLK_FLAG_DUMPING 0x0010
struct virtqueue *vtblk_vq;
@@ -82,7 +82,7 @@ struct vtblk_softc {
TAILQ_HEAD(, vtblk_request)
vtblk_req_free;
TAILQ_HEAD(, vtblk_request)
- vtblk_req_ready;
+ vtblk_req_ready;
struct taskqueue *vtblk_tq;
struct task vtblk_intr_task;
@@ -116,6 +116,13 @@ static int vtblk_suspend(device_t);
static int vtblk_resume(device_t);
static int vtblk_shutdown(device_t);
+static int vtblk_open(struct disk *);
+static int vtblk_close(struct disk *);
+static int vtblk_ioctl(struct disk *, u_long, void *, int,
+ struct thread *);
+static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
+static void vtblk_strategy(struct bio *);
+
static void vtblk_negotiate_features(struct vtblk_softc *);
static int vtblk_maximum_segments(struct vtblk_softc *,
struct virtio_blk_config *);
@@ -124,13 +131,7 @@ static void vtblk_alloc_disk(struct vtblk_softc *,
struct virtio_blk_config *);
static void vtblk_create_disk(struct vtblk_softc *);
-static int vtblk_open(struct disk *);
-static int vtblk_close(struct disk *);
-static int vtblk_ioctl(struct disk *, u_long, void *, int,
- struct thread *);
-static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
-static void vtblk_strategy(struct bio *);
-
+static int vtblk_quiesce(struct vtblk_softc *);
static void vtblk_startio(struct vtblk_softc *);
static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *);
static int vtblk_execute_request(struct vtblk_softc *,
@@ -148,6 +149,7 @@ static int vtblk_flush_dump(struct vtblk_softc *);
static int vtblk_poll_request(struct vtblk_softc *,
struct vtblk_request *);
+static void vtblk_finish_completed(struct vtblk_softc *);
static void vtblk_drain_vq(struct vtblk_softc *, int);
static void vtblk_drain(struct vtblk_softc *);
@@ -161,7 +163,8 @@ static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *);
static void vtblk_enqueue_ready(struct vtblk_softc *,
struct vtblk_request *);
-static void vtblk_bio_error(struct bio *, int);
+static int vtblk_request_error(struct vtblk_request *);
+static void vtblk_finish_bio(struct bio *, int);
/* Tunables. */
static int vtblk_no_ident = 0;
@@ -189,9 +192,8 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
#define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
-#define VTBLK_BIO_SEGMENTS(_bp) sglist_count((_bp)->bio_data, (_bp)->bio_bcount)
-
#define VTBLK_DISK_NAME "vtbd"
+#define VTBLK_QUIESCE_TIMEOUT (30 * hz)
/*
* Each block request uses at least two segments - one for the header
@@ -210,7 +212,7 @@ static device_method_t vtblk_methods[] = {
DEVMETHOD(device_resume, vtblk_resume),
DEVMETHOD(device_shutdown, vtblk_shutdown),
- { 0, 0 }
+ DEVMETHOD_END
};
static driver_t vtblk_driver = {
@@ -314,11 +316,13 @@ vtblk_attach(device_t dev)
}
sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
+ if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
+ error = EINVAL;
+ device_printf(dev, "fewer than minimum number of segments "
+ "allowed: %d\n", sc->vtblk_max_nsegs);
+ goto fail;
+ }
- /*
- * Allocate working sglist. The number of segments may be too
- * large to safely store on the stack.
- */
sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
if (sc->vtblk_sglist == NULL) {
error = ENOMEM;
@@ -376,7 +380,7 @@ vtblk_detach(device_t dev)
sc = device_get_softc(dev);
VTBLK_LOCK(sc);
- sc->vtblk_flags |= VTBLK_FLAG_DETACHING;
+ sc->vtblk_flags |= VTBLK_FLAG_DETACH;
if (device_is_attached(dev))
vtblk_stop(sc);
VTBLK_UNLOCK(sc);
@@ -408,15 +412,19 @@ static int
vtblk_suspend(device_t dev)
{
struct vtblk_softc *sc;
+ int error;
sc = device_get_softc(dev);
VTBLK_LOCK(sc);
- sc->vtblk_flags |= VTBLK_FLAG_SUSPENDED;
- /* TODO Wait for any inflight IO to complete? */
+ sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
+ /* XXX BMV: virtio_stop(), etc needed here? */
+ error = vtblk_quiesce(sc);
+ if (error)
+ sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
VTBLK_UNLOCK(sc);
- return (0);
+ return (error);
}
static int
@@ -427,8 +435,9 @@ vtblk_resume(device_t dev)
sc = device_get_softc(dev);
VTBLK_LOCK(sc);
- sc->vtblk_flags &= ~VTBLK_FLAG_SUSPENDED;
- /* TODO Resume IO? */
+ /* XXX BMV: virtio_reinit(), etc needed here? */
+ sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
+ vtblk_startio(sc);
VTBLK_UNLOCK(sc);
return (0);
@@ -449,7 +458,7 @@ vtblk_open(struct disk *dp)
if ((sc = dp->d_drv1) == NULL)
return (ENXIO);
- return (sc->vtblk_flags & VTBLK_FLAG_DETACHING ? ENXIO : 0);
+ return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
}
static int
@@ -489,6 +498,8 @@ vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
if ((sc = dp->d_drv1) == NULL)
return (ENXIO);
+ VTBLK_LOCK(sc);
+
if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
vtblk_prepare_dump(sc);
sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
@@ -498,6 +509,10 @@ vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
error = vtblk_write_dump(sc, virtual, offset, length);
else if (virtual == NULL && offset == 0)
error = vtblk_flush_dump(sc);
+ else {
+ error = EINVAL;
+ sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING;
+ }
VTBLK_UNLOCK(sc);
@@ -510,7 +525,7 @@ vtblk_strategy(struct bio *bp)
struct vtblk_softc *sc;
if ((sc = bp->bio_disk->d_drv1) == NULL) {
- vtblk_bio_error(bp, EINVAL);
+ vtblk_finish_bio(bp, EINVAL);
return;
}
@@ -520,29 +535,37 @@ vtblk_strategy(struct bio *bp)
*/
if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
(bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
- vtblk_bio_error(bp, EROFS);
+ vtblk_finish_bio(bp, EROFS);
return;
}
+#ifdef INVARIANTS
/*
* Prevent read/write buffers spanning too many segments from
* getting into the queue. This should only trip if d_maxsize
* was incorrectly set.
*/
if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
- KASSERT(VTBLK_BIO_SEGMENTS(bp) <= sc->vtblk_max_nsegs -
- VTBLK_MIN_SEGMENTS,
+ int nsegs, max_nsegs;
+
+ nsegs = sglist_count(bp->bio_data, bp->bio_bcount);
+ max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS;
+
+ KASSERT(nsegs <= max_nsegs,
("bio spanned too many segments: %d, max: %d",
- VTBLK_BIO_SEGMENTS(bp),
- sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS));
+ nsegs, max_nsegs));
}
+#endif
VTBLK_LOCK(sc);
- if ((sc->vtblk_flags & VTBLK_FLAG_DETACHING) == 0) {
+ if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
+ vtblk_finish_bio(bp, ENXIO);
+ else {
bioq_disksort(&sc->vtblk_bioq, bp);
- vtblk_startio(sc);
- } else
- vtblk_bio_error(bp, ENXIO);
+
+ if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0)
+ vtblk_startio(sc);
+ }
VTBLK_UNLOCK(sc);
}
@@ -669,6 +692,26 @@ vtblk_create_disk(struct vtblk_softc *sc)
disk_create(dp, DISK_VERSION);
}
+static int
+vtblk_quiesce(struct vtblk_softc *sc)
+{
+ int error;
+
+ error = 0;
+
+ VTBLK_LOCK_ASSERT(sc);
+
+ while (!virtqueue_empty(sc->vtblk_vq)) {
+ if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
+ VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
+ error = EBUSY;
+ break;
+ }
+ }
+
+ return (error);
+}
+
static void
vtblk_startio(struct vtblk_softc *sc)
{
@@ -681,9 +724,6 @@ vtblk_startio(struct vtblk_softc *sc)
VTBLK_LOCK_ASSERT(sc);
- if (sc->vtblk_flags & VTBLK_FLAG_SUSPENDED)
- return;
-
while (!virtqueue_full(vq)) {
if ((req = vtblk_dequeue_ready(sc)) == NULL)
req = vtblk_bio_request(sc);
@@ -736,9 +776,8 @@ vtblk_bio_request(struct vtblk_softc *sc)
req->vbr_hdr.sector = bp->bio_offset / 512;
break;
default:
- KASSERT(0, ("bio with unhandled cmd: %d", bp->bio_cmd));
- req->vbr_hdr.type = -1;
- break;
+ panic("%s: bio with unhandled cmd: %d", __FUNCTION__,
+ bp->bio_cmd);
}
if (bp->bio_flags & BIO_ORDERED)
@@ -752,7 +791,7 @@ vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
{
struct sglist *sg;
struct bio *bp;
- int writable, error;
+ int readable, writable, error;
sg = sc->vtblk_sglist;
bp = req->vbr_bp;
@@ -783,10 +822,9 @@ vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
("fewer than min segments: %d", sg->sg_nseg));
- error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
- sg->sg_nseg - writable, writable);
+ readable = sg->sg_nseg - writable;
- return (error);
+ return (virtqueue_enqueue(sc->vtblk_vq, req, sg, readable, writable));
}
static int
@@ -806,37 +844,23 @@ static void
vtblk_intr_task(void *arg, int pending)
{
struct vtblk_softc *sc;
- struct vtblk_request *req;
struct virtqueue *vq;
- struct bio *bp;
sc = arg;
vq = sc->vtblk_vq;
VTBLK_LOCK(sc);
- if (sc->vtblk_flags & VTBLK_FLAG_DETACHING) {
+ if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
VTBLK_UNLOCK(sc);
return;
}
- while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
- bp = req->vbr_bp;
-
- if (req->vbr_ack == VIRTIO_BLK_S_OK)
- bp->bio_resid = 0;
- else {
- bp->bio_flags |= BIO_ERROR;
- if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP)
- bp->bio_error = ENOTSUP;
- else
- bp->bio_error = EIO;
- }
-
- biodone(bp);
- vtblk_enqueue_request(sc, req);
- }
+ vtblk_finish_completed(sc);
- vtblk_startio(sc);
+ if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0)
+ vtblk_startio(sc);
+ else
+ wakeup(&sc->vtblk_vq);
if (virtqueue_enable_intr(vq) != 0) {
virtqueue_disable_intr(vq);
@@ -973,7 +997,6 @@ vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
{
device_t dev;
struct virtqueue *vq;
- struct vtblk_request *r;
int error;
dev = sc->vtblk_dev;
@@ -988,20 +1011,37 @@ vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
virtqueue_notify(vq);
- r = virtqueue_poll(vq, NULL);
- KASSERT(r == req, ("unexpected request response"));
+ req = virtqueue_poll(vq, NULL);
- if (req->vbr_ack != VIRTIO_BLK_S_OK) {
- error = req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? ENOTSUP : EIO;
- if (bootverbose)
- device_printf(dev,
- "vtblk_poll_request: IO error: %d\n", error);
+ error = vtblk_request_error(req);
+ if (error && bootverbose) {
+ device_printf(dev, "vtblk_poll_request: IO error: %d\n",
+ error);
}
return (error);
}
static void
+vtblk_finish_completed(struct vtblk_softc *sc)
+{
+ struct vtblk_request *req;
+ struct bio *bp;
+ int error;
+
+ while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
+ bp = req->vbr_bp;
+
+ error = vtblk_request_error(req);
+ if (error)
+ disk_err(bp, "hard error", -1, 1);
+
+ vtblk_finish_bio(bp, error);
+ vtblk_enqueue_request(sc, req);
+ }
+}
+
+static void
vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
{
struct virtqueue *vq;
@@ -1013,7 +1053,7 @@ vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
while ((req = virtqueue_drain(vq, &last)) != NULL) {
if (!skip_done)
- vtblk_bio_error(req->vbr_bp, ENXIO);
+ vtblk_finish_bio(req->vbr_bp, ENXIO);
vtblk_enqueue_request(sc, req);
}
@@ -1030,17 +1070,19 @@ vtblk_drain(struct vtblk_softc *sc)
bioq = &sc->vtblk_bioq;
- if (sc->vtblk_vq != NULL)
+ if (sc->vtblk_vq != NULL) {
+ vtblk_finish_completed(sc);
vtblk_drain_vq(sc, 0);
+ }
while ((req = vtblk_dequeue_ready(sc)) != NULL) {
- vtblk_bio_error(req->vbr_bp, ENXIO);
+ vtblk_finish_bio(req->vbr_bp, ENXIO);
vtblk_enqueue_request(sc, req);
}
while (bioq_first(bioq) != NULL) {
bp = bioq_takefirst(bioq);
- vtblk_bio_error(bp, ENXIO);
+ vtblk_finish_bio(bp, ENXIO);
}
vtblk_free_requests(sc);
@@ -1050,9 +1092,9 @@ static int
vtblk_alloc_requests(struct vtblk_softc *sc)
{
struct vtblk_request *req;
- int i, size;
+ int i, nreqs;
- size = virtqueue_size(sc->vtblk_vq);
+ nreqs = virtqueue_size(sc->vtblk_vq);
/*
* Preallocate sufficient requests to keep the virtqueue full. Each
@@ -1060,9 +1102,9 @@ vtblk_alloc_requests(struct vtblk_softc *sc)
* the number allocated when indirect descriptors are not available.
*/
if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
- size /= VTBLK_MIN_SEGMENTS;
+ nreqs /= VTBLK_MIN_SEGMENTS;
- for (i = 0; i < size; i++) {
+ for (i = 0; i < nreqs; i++) {
req = uma_zalloc(vtblk_req_zone, M_NOWAIT);
if (req == NULL)
return (ENOMEM);
@@ -1079,6 +1121,9 @@ vtblk_free_requests(struct vtblk_softc *sc)
{
struct vtblk_request *req;
+ KASSERT(TAILQ_EMPTY(&sc->vtblk_req_ready),
+ ("ready requests left on queue"));
+
while ((req = vtblk_dequeue_request(sc)) != NULL) {
sc->vtblk_request_count--;
uma_zfree(vtblk_req_zone, req);
@@ -1126,9 +1171,35 @@ vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
}
+static int
+vtblk_request_error(struct vtblk_request *req)
+{
+ int error;
+
+ switch (req->vbr_ack) {
+ case VIRTIO_BLK_S_OK:
+ error = 0;
+ break;
+ case VIRTIO_BLK_S_UNSUPP:
+ error = ENOTSUP;
+ break;
+ default:
+ error = EIO;
+ break;
+ }
+
+ return (error);
+}
+
static void
-vtblk_bio_error(struct bio *bp, int error)
+vtblk_finish_bio(struct bio *bp, int error)
{
- biofinish(bp, NULL, error);
+ if (error) {
+ bp->bio_resid = bp->bio_bcount;
+ bp->bio_error = error;
+ bp->bio_flags |= BIO_ERROR;
+ }
+
+ biodone(bp);
}
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index ca57aec1c91e..64a82ac028fe 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -223,7 +223,7 @@ static device_method_t vtnet_methods[] = {
/* VirtIO methods. */
DEVMETHOD(virtio_config_change, vtnet_config_change),
- { 0, 0 }
+ DEVMETHOD_END
};
static driver_t vtnet_driver = {
diff --git a/sys/dev/virtio/pci/virtio_pci.c b/sys/dev/virtio/pci/virtio_pci.c
index c71b5a9fe9b1..56813e4a1a7b 100644
--- a/sys/dev/virtio/pci/virtio_pci.c
+++ b/sys/dev/virtio/pci/virtio_pci.c
@@ -189,7 +189,7 @@ static device_method_t vtpci_methods[] = {
DEVMETHOD(virtio_bus_read_device_config, vtpci_read_dev_config),
DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
- { 0, 0 }
+ DEVMETHOD_END
};
static driver_t vtpci_driver = {
diff --git a/sys/dev/virtio/virtio_ring.h b/sys/dev/virtio/virtio_ring.h
index b0ab94651a9e..52580856a864 100644
--- a/sys/dev/virtio/virtio_ring.h
+++ b/sys/dev/virtio/virtio_ring.h
@@ -103,6 +103,7 @@ struct vring {
* __u16 avail_flags;
* __u16 avail_idx;
* __u16 available[num];
+ * __u16 used_event_idx;
*
* // Padding to the next align boundary.
* char pad[];
@@ -111,11 +112,19 @@ struct vring {
* __u16 used_flags;
* __u16 used_idx;
* struct vring_used_elem used[num];
+ * __u16 avail_event_idx;
* };
*
* NOTE: for VirtIO PCI, align is 4096.
*/
+/*
+ * We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility.
+ */
+#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
+#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num])
+
static inline int
vring_size(unsigned int num, unsigned long align)
{
@@ -140,4 +149,18 @@ vring_init(struct vring *vr, unsigned int num, uint8_t *p,
vr->used = (void *)
(((unsigned long) &vr->avail->ring[num] + align-1) & ~(align-1));
}
+
+/*
+ * The following is used with VIRTIO_RING_F_EVENT_IDX.
+ *
+ * Assuming a given event_idx value from the other size, if we have
+ * just incremented index from old to new_idx, should we trigger an
+ * event?
+ */
+static inline int
+vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
+{
+
+ return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
+}
#endif /* VIRTIO_RING_H */
diff --git a/sys/dev/virtio/virtqueue.c b/sys/dev/virtio/virtqueue.c
index 1fb182e3dae2..31f47d0380dc 100644
--- a/sys/dev/virtio/virtqueue.c
+++ b/sys/dev/virtio/virtqueue.c
@@ -60,6 +60,7 @@ struct virtqueue {
uint16_t vq_nentries;
uint32_t vq_flags;
#define VIRTQUEUE_FLAG_INDIRECT 0x0001
+#define VIRTQUEUE_FLAG_EVENT_IDX 0x0002
int vq_alignment;
int vq_ring_size;
@@ -126,7 +127,8 @@ static uint16_t vq_ring_enqueue_segments(struct virtqueue *,
static int vq_ring_use_indirect(struct virtqueue *, int);
static void vq_ring_enqueue_indirect(struct virtqueue *, void *,
struct sglist *, int, int);
-static void vq_ring_notify_host(struct virtqueue *, int);
+static int vq_ring_must_notify_host(struct virtqueue *);
+static void vq_ring_notify_host(struct virtqueue *);
static void vq_ring_free_chain(struct virtqueue *, uint16_t);
uint64_t
@@ -136,6 +138,7 @@ virtqueue_filter_features(uint64_t features)
mask = (1 << VIRTIO_TRANSPORT_F_START) - 1;
mask |= VIRTIO_RING_F_INDIRECT_DESC;
+ mask |= VIRTIO_RING_F_EVENT_IDX;
return (features & mask);
}
@@ -184,6 +187,9 @@ virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, int align,
vq->vq_intrhand = info->vqai_intr;
vq->vq_intrhand_arg = info->vqai_intr_arg;
+ if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0)
+ vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX;
+
if (info->vqai_maxindirsz > 1) {
error = virtqueue_init_indirect(vq, info->vqai_maxindirsz);
if (error)
@@ -384,9 +390,12 @@ virtqueue_full(struct virtqueue *vq)
void
virtqueue_notify(struct virtqueue *vq)
{
+ /* Ensure updated avail->idx is visible to host. */
+ mb();
+ if (vq_ring_must_notify_host(vq))
+ vq_ring_notify_host(vq);
vq->vq_queued_cnt = 0;
- vq_ring_notify_host(vq, 0);
}
int
@@ -395,11 +404,8 @@ virtqueue_nused(struct virtqueue *vq)
uint16_t used_idx, nused;
used_idx = vq->vq_ring.used->idx;
- if (used_idx >= vq->vq_used_cons_idx)
- nused = used_idx - vq->vq_used_cons_idx;
- else
- nused = UINT16_MAX - vq->vq_used_cons_idx +
- used_idx + 1;
+
+ nused = (uint16_t)(used_idx - vq->vq_used_cons_idx);
VQASSERT(vq, nused <= vq->vq_nentries, "used more than available");
return (nused);
@@ -427,6 +433,10 @@ virtqueue_enable_intr(struct virtqueue *vq)
* index of what's already been consumed.
*/
vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+ if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX)
+ vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx;
+ else
+ vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
mb();
@@ -441,6 +451,37 @@ virtqueue_enable_intr(struct virtqueue *vq)
return (0);
}
+int
+virtqueue_postpone_intr(struct virtqueue *vq)
+{
+ uint16_t ndesc;
+
+ /*
+ * Postpone until at least half of the available descriptors
+ * have been consumed.
+ *
+ * XXX Adaptive factor? (Linux uses 3/4)
+ */
+ ndesc = (uint16_t)(vq->vq_ring.avail->idx - vq->vq_used_cons_idx) / 2;
+
+ if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX)
+ vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc;
+ else
+ vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+
+ mb();
+
+ /*
+ * Enough items may have already been consumed to meet our
+ * threshold since we last checked. Let our caller know so
+ * it processes the new entries.
+ */
+ if (virtqueue_nused(vq) > ndesc)
+ return (1);
+
+ return (0);
+}
+
void
virtqueue_disable_intr(struct virtqueue *vq)
{
@@ -448,7 +489,8 @@ virtqueue_disable_intr(struct virtqueue *vq)
/*
* Note this is only considered a hint to the host.
*/
- vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+ if ((vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) == 0)
+ vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
}
int
@@ -618,7 +660,7 @@ vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx)
mb();
vq->vq_ring.avail->idx++;
- /* Keep pending count until virtqueue_notify() for debugging. */
+ /* Keep pending count until virtqueue_notify(). */
vq->vq_queued_cnt++;
}
@@ -709,15 +751,27 @@ vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie,
vq_ring_update_avail(vq, head_idx);
}
-static void
-vq_ring_notify_host(struct virtqueue *vq, int force)
+static int
+vq_ring_must_notify_host(struct virtqueue *vq)
{
+ uint16_t new_idx, prev_idx, event_idx;
- mb();
+ if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
+ new_idx = vq->vq_ring.avail->idx;
+ prev_idx = new_idx - vq->vq_queued_cnt;
+ event_idx = vring_avail_event(&vq->vq_ring);
+
+ return (vring_need_event(event_idx, new_idx, prev_idx) != 0);
+ }
+
+ return ((vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0);
+}
+
+static void
+vq_ring_notify_host(struct virtqueue *vq)
+{
- if (force ||
- (vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0)
- VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index);
+ VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index);
}
static void
diff --git a/sys/dev/virtio/virtqueue.h b/sys/dev/virtio/virtqueue.h
index e790e65d51a8..eab57b22a23d 100644
--- a/sys/dev/virtio/virtqueue.h
+++ b/sys/dev/virtio/virtqueue.h
@@ -78,6 +78,7 @@ int virtqueue_reinit(struct virtqueue *vq, uint16_t size);
int virtqueue_intr(struct virtqueue *vq);
int virtqueue_enable_intr(struct virtqueue *vq);
+int virtqueue_postpone_intr(struct virtqueue *vq);
void virtqueue_disable_intr(struct virtqueue *vq);
/* Get physical address of the virtqueue ring. */