diff options
Diffstat (limited to 'sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c')
| -rw-r--r-- | sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c | 109 | 
1 files changed, 72 insertions, 37 deletions
| diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index 967a018640e1..fe939150b641 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -21,7 +21,7 @@   */  /*   * Copyright (c) 2012, 2020 by Delphix. All rights reserved. - * Copyright (c) 2024, Rob Norris <robn@despairlabs.com> + * Copyright (c) 2024, 2025, Rob Norris <robn@despairlabs.com>   * Copyright (c) 2024, 2025, Klara, Inc.   */ @@ -337,16 +337,14 @@ zvol_discard(zv_request_t *zvr)  	}  	/* -	 * Align the request to volume block boundaries when a secure erase is -	 * not required.  This will prevent dnode_free_range() from zeroing out -	 * the unaligned parts which is slow (read-modify-write) and useless -	 * since we are not freeing any space by doing so. +	 * Align the request to volume block boundaries. This will prevent +	 * dnode_free_range() from zeroing out the unaligned parts which is +	 * slow (read-modify-write) and useless since we are not freeing any +	 * space by doing so.  	 */ -	if (!io_is_secure_erase(bio, rq)) { -		start = P2ROUNDUP(start, zv->zv_volblocksize); -		end = P2ALIGN_TYPED(end, zv->zv_volblocksize, uint64_t); -		size = end - start; -	} +	start = P2ROUNDUP(start, zv->zv_volblocksize); +	end = P2ALIGN_TYPED(end, zv->zv_volblocksize, uint64_t); +	size = end - start;  	if (start >= end)  		goto unlock; @@ -467,6 +465,24 @@ zvol_read_task(void *arg)  	zv_request_task_free(task);  } +/* + * Note: + * + * The kernel uses different enum names for the IO opcode, depending on the + * kernel version ('req_opf', 'req_op').  To sidestep this, use macros rather + * than inline functions for these checks. + */ +/* Should this IO go down the zvol write path? */ +#define	ZVOL_OP_IS_WRITE(op) \ +	(op == REQ_OP_WRITE || \ +	op == REQ_OP_FLUSH || \ +	op == REQ_OP_DISCARD) + +/* Is this IO type supported by zvols? */ +#define	ZVOL_OP_IS_SUPPORTED(op) (op == REQ_OP_READ || ZVOL_OP_IS_WRITE(op)) + +/* Get the IO opcode */ +#define	ZVOL_OP(bio, rq) (bio != NULL ? bio_op(bio) : req_op(rq))  /*   * Process a BIO or request @@ -486,27 +502,32 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,  	uint64_t size = io_size(bio, rq);  	int rw; -	if (rq != NULL) { -		/* -		 * Flush & trim requests go down the zvol_write codepath.  Or -		 * more specifically: -		 * -		 * If request is a write, or if it's op_is_sync() and not a -		 * read, or if it's a flush, or if it's a discard, then send the -		 * request down the write path. -		 */ -		if (op_is_write(rq->cmd_flags) || -		    (op_is_sync(rq->cmd_flags) && req_op(rq) != REQ_OP_READ) || -		    req_op(rq) == REQ_OP_FLUSH || -		    op_is_discard(rq->cmd_flags)) { -			rw = WRITE; -		} else { -			rw = READ; -		} +	if (unlikely(!ZVOL_OP_IS_SUPPORTED(ZVOL_OP(bio, rq)))) { +		zfs_dbgmsg("Unsupported zvol %s, op=%d, flags=0x%x", +		    rq != NULL ? "request" : "BIO", +		    ZVOL_OP(bio, rq), +		    rq != NULL ? rq->cmd_flags : bio->bi_opf); +		ASSERT(ZVOL_OP_IS_SUPPORTED(ZVOL_OP(bio, rq))); +		zvol_end_io(bio, rq, SET_ERROR(ENOTSUPP)); +		goto out; +	} + +	if (ZVOL_OP_IS_WRITE(ZVOL_OP(bio, rq))) { +		rw = WRITE;  	} else { -		rw = bio_data_dir(bio); +		rw = READ;  	} +	/* +	 * Sanity check +	 * +	 * If we're a BIO, check our rw matches the kernel's +	 * bio_data_dir(bio) rw.  We need to check because we support fewer +	 * IO operations, and want to verify that what we think are reads and +	 * writes from those operations match what the kernel thinks. +	 */ +	ASSERT(rq != NULL || rw == bio_data_dir(bio)); +  	if (unlikely(zv->zv_flags & ZVOL_REMOVING)) {  		zvol_end_io(bio, rq, SET_ERROR(ENXIO));  		goto out; @@ -610,7 +631,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,  		 * interfaces lack this functionality (they block waiting for  		 * the i/o to complete).  		 */ -		if (io_is_discard(bio, rq) || io_is_secure_erase(bio, rq)) { +		if (io_is_discard(bio, rq)) {  			if (force_sync) {  				zvol_discard(&zvr);  			} else { @@ -1011,12 +1032,12 @@ zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize)   * tiny devices.  For devices over 1 Mib a standard head and sector count   * is used to keep the cylinders count reasonable.   */ -static int -zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static inline int +zvol_getgeo_impl(struct gendisk *disk, struct hd_geometry *geo)  { +	zvol_state_t *zv = atomic_load_ptr(&disk->private_data);  	sector_t sectors; -	zvol_state_t *zv = atomic_load_ptr(&bdev->bd_disk->private_data);  	ASSERT3P(zv, !=, NULL);  	ASSERT3U(zv->zv_open_count, >, 0); @@ -1036,6 +1057,20 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)  	return (0);  } +#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_GETGEO_GENDISK +static int +zvol_getgeo(struct gendisk *disk, struct hd_geometry *geo) +{ +	return (zvol_getgeo_impl(disk, geo)); +} +#else +static int +zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ +	return (zvol_getgeo_impl(bdev->bd_disk, geo)); +} +#endif +  /*   * Why have two separate block_device_operations structs?   * @@ -1479,7 +1514,7 @@ zvol_os_remove_minor(zvol_state_t *zv)  	if (zso->use_blk_mq)  		blk_mq_free_tag_set(&zso->tag_set); -	ida_simple_remove(&zvol_ida, MINOR(zso->zvo_dev) >> ZVOL_MINOR_BITS); +	ida_free(&zvol_ida, MINOR(zso->zvo_dev) >> ZVOL_MINOR_BITS);  	kmem_free(zso, sizeof (struct zvol_state_os)); @@ -1634,7 +1669,7 @@ zvol_os_create_minor(const char *name)  	if (zvol_inhibit_dev)  		return (0); -	idx = ida_simple_get(&zvol_ida, 0, 0, kmem_flags_convert(KM_SLEEP)); +	idx = ida_alloc(&zvol_ida, kmem_flags_convert(KM_SLEEP));  	if (idx < 0)  		return (SET_ERROR(-idx));  	minor = idx << ZVOL_MINOR_BITS; @@ -1642,7 +1677,7 @@ zvol_os_create_minor(const char *name)  		/* too many partitions can cause an overflow */  		zfs_dbgmsg("zvol: create minor overflow: %s, minor %u/%u",  		    name, minor, MINOR(minor)); -		ida_simple_remove(&zvol_ida, idx); +		ida_free(&zvol_ida, idx);  		return (SET_ERROR(EINVAL));  	} @@ -1650,7 +1685,7 @@ zvol_os_create_minor(const char *name)  	if (zv) {  		ASSERT(MUTEX_HELD(&zv->zv_state_lock));  		mutex_exit(&zv->zv_state_lock); -		ida_simple_remove(&zvol_ida, idx); +		ida_free(&zvol_ida, idx);  		return (SET_ERROR(EEXIST));  	} @@ -1750,7 +1785,7 @@ out_doi:  		rw_exit(&zvol_state_lock);  		error = zvol_os_add_disk(zv->zv_zso->zvo_disk);  	} else { -		ida_simple_remove(&zvol_ida, idx); +		ida_free(&zvol_ida, idx);  	}  	return (error); | 
