diff options
author | Martin Matuska <mm@FreeBSD.org> | 2021-02-16 00:39:34 +0000 |
---|---|---|
committer | Martin Matuska <mm@FreeBSD.org> | 2021-02-16 01:46:28 +0000 |
commit | 184c1b943937986c81e1996d999d21626ec7a4ff (patch) | |
tree | f7321df93d0bd5ffb8cf9245c84745dac7e81ce1 /sys/contrib/openzfs/module/os/linux | |
parent | 10fc4c3218381fef7189a5b8d46a757cd1989dff (diff) | |
parent | 83dd4a9252fd2044038a399d7afc68259d483b8e (diff) | |
download | src-184c1b943937986c81e1996d999d21626ec7a4ff.tar.gz src-184c1b943937986c81e1996d999d21626ec7a4ff.zip |
Diffstat (limited to 'sys/contrib/openzfs/module/os/linux')
14 files changed, 474 insertions, 140 deletions
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c index 1da7618185ec..36fdff72a133 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c @@ -284,9 +284,7 @@ int64_t __divdi3(int64_t u, int64_t v) { int64_t q, t; - // cppcheck-suppress shiftTooManyBitsSigned q = __udivdi3(abs64(u), abs64(v)); - // cppcheck-suppress shiftTooManyBitsSigned t = (u ^ v) >> 63; // If u, v have different return ((q ^ t) - t); // signs, negate q. } diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c index dbbf72c8569d..c7f1aadf784e 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kstat.c @@ -486,7 +486,7 @@ proc_kstat_open(struct inode *inode, struct file *filp) f = filp->private_data; f->private = PDE_DATA(inode); - return (rc); + return (0); } static ssize_t diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c index e8d89bfeabe5..61631256c858 100644 --- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c +++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c @@ -274,8 +274,6 @@ taskq_lowest_id(taskq_t *tq) taskq_ent_t *t; taskq_thread_t *tqt; - ASSERT(tq); - if (!list_empty(&tq->tq_pend_list)) { t = list_entry(tq->tq_pend_list.next, taskq_ent_t, tqent_list); lowest_id = MIN(lowest_id, t->tqent_id); @@ -995,6 +993,7 @@ error: spin_unlock_irqrestore(&tq->tq_lock, flags); tsd_set(taskq_tsd, NULL); + thread_exit(); return (0); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c index 0abac228447f..d82e5f4dcf15 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c @@ -185,7 +185,7 @@ abd_chunkcnt_for_bytes(size_t size) } abd_t * -abd_alloc_struct(size_t size) +abd_alloc_struct_impl(size_t size) { /* * In Linux we do not use the size passed in during ABD @@ -193,18 +193,14 @@ abd_alloc_struct(size_t size) */ abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE); ASSERT3P(abd, !=, NULL); - list_link_init(&abd->abd_gang_link); - mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL); ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t)); return (abd); } void -abd_free_struct(abd_t *abd) +abd_free_struct_impl(abd_t *abd) { - mutex_destroy(&abd->abd_mtx); - ASSERT(!list_link_active(&abd->abd_gang_link)); kmem_cache_free(abd_cache, abd); ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t)); } @@ -472,14 +468,12 @@ abd_alloc_zero_scatter(void) ASSERT3U(table.nents, ==, nr_pages); abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE); - abd_zero_scatter->abd_flags = ABD_FLAG_OWNER; + abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER; ABD_SCATTER(abd_zero_scatter).abd_offset = 0; ABD_SCATTER(abd_zero_scatter).abd_sgl = table.sgl; ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages; abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE; - abd_zero_scatter->abd_parent = NULL; abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS; - zfs_refcount_create(&abd_zero_scatter->abd_children); abd_for_each_sg(abd_zero_scatter, sg, nr_pages, i) { sg_set_page(sg, abd_zero_page, PAGESIZE, 0); @@ -599,12 +593,11 @@ abd_alloc_zero_scatter(void) abd_zero_page = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP); memset(abd_zero_page, 0, PAGESIZE); abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE); - abd_zero_scatter->abd_flags = ABD_FLAG_OWNER; + abd_zero_scatter->abd_flags |= ABD_FLAG_OWNER; abd_zero_scatter->abd_flags |= ABD_FLAG_MULTI_CHUNK | ABD_FLAG_ZEROS; ABD_SCATTER(abd_zero_scatter).abd_offset = 0; ABD_SCATTER(abd_zero_scatter).abd_nents = nr_pages; abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE; - abd_zero_scatter->abd_parent = NULL; zfs_refcount_create(&abd_zero_scatter->abd_children); ABD_SCATTER(abd_zero_scatter).abd_sgl = vmem_alloc(nr_pages * sizeof (struct scatterlist), KM_SLEEP); @@ -678,7 +671,6 @@ abd_verify_scatter(abd_t *abd) static void abd_free_zero_scatter(void) { - zfs_refcount_destroy(&abd_zero_scatter->abd_children); ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); ABDSTAT_INCR(abdstat_scatter_data_size, -(int)PAGESIZE); ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk); @@ -747,9 +739,7 @@ abd_free_linear_page(abd_t *abd) ABD_SCATTER(abd).abd_sgl = sg; abd_free_chunks(abd); - zfs_refcount_destroy(&abd->abd_children); abd_update_scatter_stats(abd, ABDSTAT_DECR); - abd_free_struct(abd); } /* @@ -770,9 +760,8 @@ abd_alloc_for_io(size_t size, boolean_t is_metadata) } abd_t * -abd_get_offset_scatter(abd_t *sabd, size_t off) +abd_get_offset_scatter(abd_t *abd, abd_t *sabd, size_t off) { - abd_t *abd = NULL; int i = 0; struct scatterlist *sg = NULL; @@ -781,14 +770,14 @@ abd_get_offset_scatter(abd_t *sabd, size_t off) size_t new_offset = ABD_SCATTER(sabd).abd_offset + off; - abd = abd_alloc_struct(0); + if (abd == NULL) + abd = abd_alloc_struct(0); /* * Even if this buf is filesystem metadata, we only track that * if we own the underlying data buffer, which is not true in * this case. Therefore, we don't ever use ABD_FLAG_META here. */ - abd->abd_flags = 0; abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) { if (new_offset < sg->length) @@ -936,17 +925,28 @@ abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off) { unsigned long pos; - while (abd_is_gang(abd)) - abd = abd_gang_get_offset(abd, &off); + if (abd_is_gang(abd)) { + unsigned long count = 0; + + for (abd_t *cabd = abd_gang_get_offset(abd, &off); + cabd != NULL && size != 0; + cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) { + ASSERT3U(off, <, cabd->abd_size); + int mysize = MIN(size, cabd->abd_size - off); + count += abd_nr_pages_off(cabd, mysize, off); + size -= mysize; + off = 0; + } + return (count); + } - ASSERT(!abd_is_gang(abd)); if (abd_is_linear(abd)) pos = (unsigned long)abd_to_buf(abd) + off; else pos = ABD_SCATTER(abd).abd_offset + off; - return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) - - (pos >> PAGE_SHIFT); + return (((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) - + (pos >> PAGE_SHIFT)); } static unsigned int @@ -1021,7 +1021,6 @@ unsigned int abd_bio_map_off(struct bio *bio, abd_t *abd, unsigned int io_size, size_t off) { - int i; struct abd_iter aiter; ASSERT3U(io_size, <=, abd->abd_size - off); @@ -1035,7 +1034,7 @@ abd_bio_map_off(struct bio *bio, abd_t *abd, abd_iter_init(&aiter, abd); abd_iter_advance(&aiter, off); - for (i = 0; i < bio->bi_max_vecs; i++) { + for (int i = 0; i < bio->bi_max_vecs; i++) { struct page *pg; size_t len, sgoff, pgoff; struct scatterlist *sg; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c index 4bd27d1b516f..b373f2c2e83c 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c @@ -350,19 +350,14 @@ vdev_disk_close(vdev_t *v) static dio_request_t * vdev_disk_dio_alloc(int bio_count) { - dio_request_t *dr; - int i; - - dr = kmem_zalloc(sizeof (dio_request_t) + + dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) + sizeof (struct bio *) * bio_count, KM_SLEEP); - if (dr) { - atomic_set(&dr->dr_ref, 0); - dr->dr_bio_count = bio_count; - dr->dr_error = 0; + atomic_set(&dr->dr_ref, 0); + dr->dr_bio_count = bio_count; + dr->dr_error = 0; - for (i = 0; i < dr->dr_bio_count; i++) - dr->dr_bio[i] = NULL; - } + for (int i = 0; i < dr->dr_bio_count; i++) + dr->dr_bio[i] = NULL; return (dr); } @@ -536,8 +531,9 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, dio_request_t *dr; uint64_t abd_offset; uint64_t bio_offset; - int bio_size, bio_count = 16; - int i = 0, error = 0; + int bio_size; + int bio_count = 16; + int error = 0; struct blk_plug plug; /* @@ -552,8 +548,6 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, retry: dr = vdev_disk_dio_alloc(bio_count); - if (dr == NULL) - return (SET_ERROR(ENOMEM)); if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) bio_set_flags_failfast(bdev, &flags); @@ -561,26 +555,28 @@ retry: dr->dr_zio = zio; /* - * When the IO size exceeds the maximum bio size for the request - * queue we are forced to break the IO in multiple bio's and wait - * for them all to complete. Ideally, all pool users will set - * their volume block size to match the maximum request size and - * the common case will be one bio per vdev IO request. + * Since bio's can have up to BIO_MAX_PAGES=256 iovec's, each of which + * is at least 512 bytes and at most PAGESIZE (typically 4K), one bio + * can cover at least 128KB and at most 1MB. When the required number + * of iovec's exceeds this, we are forced to break the IO in multiple + * bio's and wait for them all to complete. This is likely if the + * recordsize property is increased beyond 1MB. The default + * bio_count=16 should typically accommodate the maximum-size zio of + * 16MB. */ abd_offset = 0; bio_offset = io_offset; - bio_size = io_size; - for (i = 0; i <= dr->dr_bio_count; i++) { + bio_size = io_size; + for (int i = 0; i <= dr->dr_bio_count; i++) { /* Finished constructing bio's for given buffer */ if (bio_size <= 0) break; /* - * By default only 'bio_count' bio's per dio are allowed. - * However, if we find ourselves in a situation where more - * are needed we allocate a larger dio and warn the user. + * If additional bio's are required, we have to retry, but + * this should be rare - see the comment above. */ if (dr->dr_bio_count == i) { vdev_disk_dio_free(dr); @@ -622,9 +618,10 @@ retry: blk_start_plug(&plug); /* Submit all bio's associated with this dio */ - for (i = 0; i < dr->dr_bio_count; i++) + for (int i = 0; i < dr->dr_bio_count; i++) { if (dr->dr_bio[i]) vdev_submit_bio(dr->dr_bio[i]); + } if (dr->dr_bio_count > 1) blk_finish_plug(&plug); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c new file mode 100644 index 000000000000..3b0f824115f8 --- /dev/null +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c @@ -0,0 +1,333 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * University Copyright- Copyright (c) 1982, 1986, 1988 + * The Regents of the University of California + * All Rights Reserved + * + * University Acknowledgment- Portions of this document are derived from + * software developed by the University of California, Berkeley, and its + * contributors. + */ +/* + * Copyright (c) 2015 by Chunwei Chen. All rights reserved. + */ + +#ifdef _KERNEL + +#include <sys/types.h> +#include <sys/uio_impl.h> +#include <sys/sysmacros.h> +#include <sys/strings.h> +#include <linux/kmap_compat.h> +#include <linux/uaccess.h> + +/* + * Move "n" bytes at byte address "p"; "rw" indicates the direction + * of the move, and the I/O parameters are provided in "uio", which is + * update to reflect the data which was moved. Returns 0 on success or + * a non-zero errno on failure. + */ +static int +zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) +{ + const struct iovec *iov = uio->uio_iov; + size_t skip = uio->uio_skip; + ulong_t cnt; + + while (n && uio->uio_resid) { + cnt = MIN(iov->iov_len - skip, n); + switch (uio->uio_segflg) { + case UIO_USERSPACE: + /* + * p = kernel data pointer + * iov->iov_base = user data pointer + */ + if (rw == UIO_READ) { + if (copy_to_user(iov->iov_base+skip, p, cnt)) + return (EFAULT); + } else { + unsigned long b_left = 0; + if (uio->uio_fault_disable) { + if (!zfs_access_ok(VERIFY_READ, + (iov->iov_base + skip), cnt)) { + return (EFAULT); + } + pagefault_disable(); + b_left = + __copy_from_user_inatomic(p, + (iov->iov_base + skip), cnt); + pagefault_enable(); + } else { + b_left = + copy_from_user(p, + (iov->iov_base + skip), cnt); + } + if (b_left > 0) { + unsigned long c_bytes = + cnt - b_left; + uio->uio_skip += c_bytes; + ASSERT3U(uio->uio_skip, <, + iov->iov_len); + uio->uio_resid -= c_bytes; + uio->uio_loffset += c_bytes; + return (EFAULT); + } + } + break; + case UIO_SYSSPACE: + if (rw == UIO_READ) + bcopy(p, iov->iov_base + skip, cnt); + else + bcopy(iov->iov_base + skip, p, cnt); + break; + default: + ASSERT(0); + } + skip += cnt; + if (skip == iov->iov_len) { + skip = 0; + uio->uio_iov = (++iov); + uio->uio_iovcnt--; + } + uio->uio_skip = skip; + uio->uio_resid -= cnt; + uio->uio_loffset += cnt; + p = (caddr_t)p + cnt; + n -= cnt; + } + return (0); +} + +static int +zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) +{ + const struct bio_vec *bv = uio->uio_bvec; + size_t skip = uio->uio_skip; + ulong_t cnt; + + while (n && uio->uio_resid) { + void *paddr; + cnt = MIN(bv->bv_len - skip, n); + + paddr = zfs_kmap_atomic(bv->bv_page, KM_USER1); + if (rw == UIO_READ) + bcopy(p, paddr + bv->bv_offset + skip, cnt); + else + bcopy(paddr + bv->bv_offset + skip, p, cnt); + zfs_kunmap_atomic(paddr, KM_USER1); + + skip += cnt; + if (skip == bv->bv_len) { + skip = 0; + uio->uio_bvec = (++bv); + uio->uio_iovcnt--; + } + uio->uio_skip = skip; + uio->uio_resid -= cnt; + uio->uio_loffset += cnt; + p = (caddr_t)p + cnt; + n -= cnt; + } + return (0); +} + +#if defined(HAVE_VFS_IOV_ITER) +static int +zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, + boolean_t revert) +{ + size_t cnt = MIN(n, uio->uio_resid); + + if (uio->uio_skip) + iov_iter_advance(uio->uio_iter, uio->uio_skip); + + if (rw == UIO_READ) + cnt = copy_to_iter(p, cnt, uio->uio_iter); + else + cnt = copy_from_iter(p, cnt, uio->uio_iter); + + /* + * When operating on a full pipe no bytes are processed. + * In which case return EFAULT which is converted to EAGAIN + * by the kernel's generic_file_splice_read() function. + */ + if (cnt == 0) + return (EFAULT); + + /* + * Revert advancing the uio_iter. This is set by zfs_uiocopy() + * to avoid consuming the uio and its iov_iter structure. + */ + if (revert) + iov_iter_revert(uio->uio_iter, cnt); + + uio->uio_resid -= cnt; + uio->uio_loffset += cnt; + + return (0); +} +#endif + +int +zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) +{ + if (uio->uio_segflg == UIO_BVEC) + return (zfs_uiomove_bvec(p, n, rw, uio)); +#if defined(HAVE_VFS_IOV_ITER) + else if (uio->uio_segflg == UIO_ITER) + return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE)); +#endif + else + return (zfs_uiomove_iov(p, n, rw, uio)); +} +EXPORT_SYMBOL(zfs_uiomove); + +/* + * Fault in the pages of the first n bytes specified by the uio structure. + * 1 byte in each page is touched and the uio struct is unmodified. Any + * error will terminate the process as this is only a best attempt to get + * the pages resident. + */ +int +zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio) +{ + if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) { + /* There's never a need to fault in kernel pages */ + return (0); +#if defined(HAVE_VFS_IOV_ITER) + } else if (uio->uio_segflg == UIO_ITER) { + /* + * At least a Linux 4.9 kernel, iov_iter_fault_in_readable() + * can be relied on to fault in user pages when referenced. + */ + if (iov_iter_fault_in_readable(uio->uio_iter, n)) + return (EFAULT); +#endif + } else { + /* Fault in all user pages */ + ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE); + const struct iovec *iov = uio->uio_iov; + int iovcnt = uio->uio_iovcnt; + size_t skip = uio->uio_skip; + uint8_t tmp; + caddr_t p; + + for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) { + ulong_t cnt = MIN(iov->iov_len - skip, n); + /* empty iov */ + if (cnt == 0) + continue; + n -= cnt; + /* touch each page in this segment. */ + p = iov->iov_base + skip; + while (cnt) { + if (get_user(tmp, (uint8_t *)p)) + return (EFAULT); + ulong_t incr = MIN(cnt, PAGESIZE); + p += incr; + cnt -= incr; + } + /* touch the last byte in case it straddles a page. */ + p--; + if (get_user(tmp, (uint8_t *)p)) + return (EFAULT); + } + } + + if (iterp && iov_iter_fault_in_readable(iterp, n)) + return (EFAULT); +#endif + return (0); +} +EXPORT_SYMBOL(zfs_uio_prefaultpages); + +/* + * The same as zfs_uiomove() but doesn't modify uio structure. + * return in cbytes how many bytes were copied. + */ +int +zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes) +{ + zfs_uio_t uio_copy; + int ret; + + bcopy(uio, &uio_copy, sizeof (zfs_uio_t)); + + if (uio->uio_segflg == UIO_BVEC) + ret = zfs_uiomove_bvec(p, n, rw, &uio_copy); +#if defined(HAVE_VFS_IOV_ITER) + else if (uio->uio_segflg == UIO_ITER) + ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE); +#endif + else + ret = zfs_uiomove_iov(p, n, rw, &uio_copy); + + *cbytes = uio->uio_resid - uio_copy.uio_resid; + + return (ret); +} +EXPORT_SYMBOL(zfs_uiocopy); + +/* + * Drop the next n chars out of *uio. + */ +void +zfs_uioskip(zfs_uio_t *uio, size_t n) +{ + if (n > uio->uio_resid) + return; + + if (uio->uio_segflg == UIO_BVEC) { + uio->uio_skip += n; + while (uio->uio_iovcnt && + uio->uio_skip >= uio->uio_bvec->bv_len) { + uio->uio_skip -= uio->uio_bvec->bv_len; + uio->uio_bvec++; + uio->uio_iovcnt--; + } +#if defined(HAVE_VFS_IOV_ITER) + } else if (uio->uio_segflg == UIO_ITER) { + iov_iter_advance(uio->uio_iter, n); +#endif + } else { + uio->uio_skip += n; + while (uio->uio_iovcnt && + uio->uio_skip >= uio->uio_iov->iov_len) { + uio->uio_skip -= uio->uio_iov->iov_len; + uio->uio_iov++; + uio->uio_iovcnt--; + } + } + uio->uio_loffset += n; + uio->uio_resid -= n; +} +EXPORT_SYMBOL(zfs_uioskip); + +#endif /* _KERNEL */ diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c index 165c1218ae79..3cc4b560e477 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c @@ -1772,7 +1772,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp) *ipp = ZTOI(zp); if (*ipp) - zfs_inode_update(ITOZ(*ipp)); + zfs_znode_update_vfs(ITOZ(*ipp)); ZFS_EXIT(zfsvfs); return (0); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c index 3be387a30e5c..84c33b541ea3 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c @@ -87,15 +87,18 @@ * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros * can return EIO from the calling function. * - * (2) zrele() should always be the last thing except for zil_commit() - * (if necessary) and ZFS_EXIT(). This is for 3 reasons: - * First, if it's the last reference, the vnode/znode - * can be freed, so the zp may point to freed memory. Second, the last - * reference will call zfs_zinactive(), which may induce a lot of work -- - * pushing cached pages (which acquires range locks) and syncing out - * cached atime changes. Third, zfs_zinactive() may require a new tx, - * which could deadlock the system if you were already holding one. - * If you must call zrele() within a tx then use zfs_zrele_async(). + * (2) zrele() should always be the last thing except for zil_commit() (if + * necessary) and ZFS_EXIT(). This is for 3 reasons: First, if it's the + * last reference, the vnode/znode can be freed, so the zp may point to + * freed memory. Second, the last reference will call zfs_zinactive(), + * which may induce a lot of work -- pushing cached pages (which acquires + * range locks) and syncing out cached atime changes. Third, + * zfs_zinactive() may require a new tx, which could deadlock the system + * if you were already holding one. This deadlock occurs because the tx + * currently being operated on prevents a txg from syncing, which + * prevents the new tx from progressing, resulting in a deadlock. If you + * must call zrele() within a tx, use zfs_zrele_async(). Note that iput() + * is a synonym for zrele(). * * (3) All range locks must be grabbed before calling dmu_tx_assign(), * as they can span dmu_tx_assign() calls. @@ -298,7 +301,7 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os) * the file is memory mapped. */ int -mappedread(znode_t *zp, int nbytes, uio_t *uio) +mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio) { struct inode *ip = ZTOI(zp); struct address_space *mp = ip->i_mapping; @@ -320,7 +323,7 @@ mappedread(znode_t *zp, int nbytes, uio_t *uio) unlock_page(pp); pb = kmap(pp); - error = uiomove(pb + off, bytes, UIO_READ, uio); + error = zfs_uiomove(pb + off, bytes, UIO_READ, uio); kunmap(pp); if (mapping_writably_mapped(mp)) @@ -372,8 +375,8 @@ zfs_write_simple(znode_t *zp, const void *data, size_t len, iov.iov_base = (void *)data; iov.iov_len = len; - uio_t uio; - uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0); + zfs_uio_t uio; + zfs_uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0); cookie = spl_fstrans_mark(); error = zfs_write(zp, &uio, 0, kcred); @@ -381,8 +384,8 @@ zfs_write_simple(znode_t *zp, const void *data, size_t len, if (error == 0) { if (residp != NULL) - *residp = uio_resid(&uio); - else if (uio_resid(&uio) != 0) + *residp = zfs_uio_resid(&uio); + else if (zfs_uio_resid(&uio) != 0) error = SET_ERROR(EIO); } @@ -398,11 +401,18 @@ zfs_zrele_async(znode_t *zp) ASSERT(atomic_read(&ip->i_count) > 0); ASSERT(os != NULL); - if (atomic_read(&ip->i_count) == 1) + /* + * If decrementing the count would put us at 0, we can't do it inline + * here, because that would be synchronous. Instead, dispatch an iput + * to run later. + * + * For more information on the dangers of a synchronous iput, see the + * header comment of this file. + */ + if (!atomic_add_unless(&ip->i_count, -1, 1)) { VERIFY(taskq_dispatch(dsl_pool_zrele_taskq(dmu_objset_pool(os)), (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID); - else - zrele(zp); + } } @@ -516,7 +526,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr, error = zfs_dirlook(zdp, nm, zpp, flags, direntflags, realpnp); if ((error == 0) && (*zpp)) - zfs_inode_update(*zpp); + zfs_znode_update_vfs(*zpp); ZFS_EXIT(zfsvfs); return (error); @@ -779,8 +789,8 @@ out: if (zp) zrele(zp); } else { - zfs_inode_update(dzp); - zfs_inode_update(zp); + zfs_znode_update_vfs(dzp); + zfs_znode_update_vfs(zp); *zpp = zp; } @@ -902,8 +912,8 @@ out: if (zp) zrele(zp); } else { - zfs_inode_update(dzp); - zfs_inode_update(zp); + zfs_znode_update_vfs(dzp); + zfs_znode_update_vfs(zp); *ipp = ZTOI(zp); } @@ -1129,8 +1139,8 @@ out: pn_free(realnmp); zfs_dirent_unlock(dl); - zfs_inode_update(dzp); - zfs_inode_update(zp); + zfs_znode_update_vfs(dzp); + zfs_znode_update_vfs(zp); if (delete_now) zrele(zp); @@ -1138,7 +1148,7 @@ out: zfs_zrele_async(zp); if (xzp) { - zfs_inode_update(xzp); + zfs_znode_update_vfs(xzp); zfs_zrele_async(xzp); } @@ -1335,8 +1345,8 @@ out: if (error != 0) { zrele(zp); } else { - zfs_inode_update(dzp); - zfs_inode_update(zp); + zfs_znode_update_vfs(dzp); + zfs_znode_update_vfs(zp); } ZFS_EXIT(zfsvfs); return (error); @@ -1461,8 +1471,8 @@ top: out: zfs_dirent_unlock(dl); - zfs_inode_update(dzp); - zfs_inode_update(zp); + zfs_znode_update_vfs(dzp); + zfs_znode_update_vfs(zp); zrele(zp); if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) @@ -2532,7 +2542,7 @@ out: err2 = zfs_setattr_dir(attrzp); zrele(attrzp); } - zfs_inode_update(zp); + zfs_znode_update_vfs(zp); } out2: @@ -2990,17 +3000,17 @@ out: zfs_dirent_unlock(sdl); zfs_dirent_unlock(tdl); - zfs_inode_update(sdzp); + zfs_znode_update_vfs(sdzp); if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); if (sdzp != tdzp) - zfs_inode_update(tdzp); + zfs_znode_update_vfs(tdzp); - zfs_inode_update(szp); + zfs_znode_update_vfs(szp); zrele(szp); if (tzp) { - zfs_inode_update(tzp); + zfs_znode_update_vfs(tzp); zrele(tzp); } @@ -3159,8 +3169,8 @@ top: txtype |= TX_CI; zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); - zfs_inode_update(dzp); - zfs_inode_update(zp); + zfs_znode_update_vfs(dzp); + zfs_znode_update_vfs(zp); } zfs_acl_ids_free(&acl_ids); @@ -3198,7 +3208,7 @@ top: */ /* ARGSUSED */ int -zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr) +zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr) { znode_t *zp = ITOZ(ip); zfsvfs_t *zfsvfs = ITOZSB(ip); @@ -3409,8 +3419,8 @@ top: if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg); - zfs_inode_update(tdzp); - zfs_inode_update(szp); + zfs_znode_update_vfs(tdzp); + zfs_znode_update_vfs(szp); ZFS_EXIT(zfsvfs); return (error); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c index b33594488ee0..d59c1bb0716a 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c @@ -479,14 +479,10 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip) } /* - * Update the embedded inode given the znode. We should work toward - * eliminating this function as soon as possible by removing values - * which are duplicated between the znode and inode. If the generic - * inode has the correct field it should be used, and the ZFS code - * updated to access the inode. This can be done incrementally. + * Update the embedded inode given the znode. */ void -zfs_inode_update(znode_t *zp) +zfs_znode_update_vfs(znode_t *zp) { zfsvfs_t *zfsvfs; struct inode *ip; @@ -602,7 +598,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ZFS_TIME_DECODE(&ip->i_ctime, ctime); ip->i_ino = zp->z_id; - zfs_inode_update(zp); + zfs_znode_update_vfs(zp); zfs_inode_set_ops(zfsvfs, ip); /* @@ -1278,7 +1274,7 @@ zfs_rezget(znode_t *zp) zp->z_blksz = doi.doi_data_block_size; zp->z_atime_dirty = B_FALSE; - zfs_inode_update(zp); + zfs_znode_update_vfs(zp); /* * If the file has zero links, then it has been unlinked on the send @@ -1796,7 +1792,7 @@ log: dmu_tx_commit(tx); - zfs_inode_update(zp); + zfs_znode_update_vfs(zp); error = 0; out: @@ -2127,8 +2123,10 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, size_t complen; int is_xattrdir = 0; - if (prevdb) + if (prevdb) { + ASSERT(prevhdl != NULL); zfs_release_sa_handle(prevhdl, prevdb, FTAG); + } if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, &is_xattrdir)) != 0) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c index 8106359e1c77..284ca706ede5 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zio_crypt.c @@ -376,7 +376,7 @@ error: static int zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key, crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen, - uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len) + zfs_uio_t *puio, zfs_uio_t *cuio, uint8_t *authbuf, uint_t auth_len) { int ret; crypto_data_t plaindata, cipherdata; @@ -479,7 +479,7 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out) { int ret; - uio_t puio, cuio; + zfs_uio_t puio, cuio; uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; uint64_t crypt = key->zk_crypt; @@ -495,7 +495,7 @@ zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, if (ret != 0) goto error; - /* initialize uio_ts */ + /* initialize zfs_uio_ts */ plain_iovecs[0].iov_base = key->zk_master_keydata; plain_iovecs[0].iov_len = keydata_len; plain_iovecs[1].iov_base = key->zk_hmac_keydata; @@ -550,7 +550,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, uint8_t *mac, zio_crypt_key_t *key) { crypto_mechanism_t mech; - uio_t puio, cuio; + zfs_uio_t puio, cuio; uint64_t aad[3]; iovec_t plain_iovecs[2], cipher_iovecs[3]; uint_t enc_len, keydata_len, aad_len; @@ -563,7 +563,7 @@ zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, keydata_len = zio_crypt_table[crypt].ci_keylen; - /* initialize uio_ts */ + /* initialize zfs_uio_ts */ plain_iovecs[0].iov_base = key->zk_master_keydata; plain_iovecs[0].iov_len = keydata_len; plain_iovecs[1].iov_base = key->zk_hmac_keydata; @@ -1296,7 +1296,7 @@ error: } static void -zio_crypt_destroy_uio(uio_t *uio) +zio_crypt_destroy_uio(zfs_uio_t *uio) { if (uio->uio_iov) kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t)); @@ -1386,8 +1386,8 @@ zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, */ static int zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, - uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio, - uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, + uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, zfs_uio_t *puio, + zfs_uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { int ret; @@ -1581,7 +1581,7 @@ error: static int zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version, uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, - uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, + zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { int ret; @@ -1764,7 +1764,7 @@ error: static int zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf, - uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio, + uint8_t *cipherbuf, uint_t datalen, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len) { int ret; @@ -1824,8 +1824,8 @@ error: static int zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot, uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, - uint8_t *mac, uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, - uint_t *auth_len, boolean_t *no_crypt) + uint8_t *mac, zfs_uio_t *puio, zfs_uio_t *cuio, uint_t *enc_len, + uint8_t **authbuf, uint_t *auth_len, boolean_t *no_crypt) { int ret; iovec_t *mac_iov; @@ -1884,7 +1884,7 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint64_t crypt = key->zk_crypt; uint_t keydata_len = zio_crypt_table[crypt].ci_keylen; uint_t enc_len, auth_len; - uio_t puio, cuio; + zfs_uio_t puio, cuio; uint8_t enc_keydata[MASTER_KEY_MAX_LEN]; crypto_key_t tmp_ckey, *ckey = NULL; crypto_ctx_template_t tmpl; @@ -1950,8 +1950,8 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, /* If the hardware implementation fails fall back to software */ } - bzero(&puio, sizeof (uio_t)); - bzero(&cuio, sizeof (uio_t)); + bzero(&puio, sizeof (zfs_uio_t)); + bzero(&cuio, sizeof (zfs_uio_t)); /* create uios for encryption */ ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf, diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c index 9e08c94e2147..970db4a8b73a 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c @@ -242,13 +242,13 @@ zpl_file_accessed(struct file *filp) * Otherwise, for older kernels extract the iovec and pass it instead. */ static void -zpl_uio_init(uio_t *uio, struct kiocb *kiocb, struct iov_iter *to, +zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to, loff_t pos, ssize_t count, size_t skip) { #if defined(HAVE_VFS_IOV_ITER) - uio_iov_iter_init(uio, to, pos, count, skip); + zfs_uio_iov_iter_init(uio, to, pos, count, skip); #else - uio_iovec_init(uio, to->iov, to->nr_segs, pos, + zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos, to->type & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE, count, skip); #endif @@ -261,7 +261,7 @@ zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to) fstrans_cookie_t cookie; struct file *filp = kiocb->ki_filp; ssize_t count = iov_iter_count(to); - uio_t uio; + zfs_uio_t uio; zpl_uio_init(&uio, kiocb, to, kiocb->ki_pos, count, 0); @@ -317,7 +317,7 @@ zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from) fstrans_cookie_t cookie; struct file *filp = kiocb->ki_filp; struct inode *ip = filp->f_mapping->host; - uio_t uio; + zfs_uio_t uio; size_t count = 0; ssize_t ret; @@ -364,8 +364,8 @@ zpl_aio_read(struct kiocb *kiocb, const struct iovec *iov, if (ret) return (ret); - uio_t uio; - uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, + zfs_uio_t uio; + zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, count, 0); crhold(cr); @@ -407,8 +407,8 @@ zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov, if (ret) return (ret); - uio_t uio; - uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, + zfs_uio_t uio; + zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, count, 0); crhold(cr); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c index f336fbb1272b..e79d334edc9b 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c @@ -499,8 +499,8 @@ zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link) iov.iov_len = MAXPATHLEN; iov.iov_base = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - uio_t uio; - uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, MAXPATHLEN - 1, 0); + zfs_uio_t uio; + zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, MAXPATHLEN - 1, 0); cookie = spl_fstrans_mark(); error = -zfs_readlink(ip, &uio, cr); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c index 1ec3dae2bb81..83812f2dcba8 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c @@ -306,15 +306,15 @@ zpl_xattr_get_dir(struct inode *ip, const char *name, void *value, iov.iov_base = (void *)value; iov.iov_len = size; - uio_t uio; - uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0); + zfs_uio_t uio; + zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0); cookie = spl_fstrans_mark(); error = -zfs_read(ITOZ(xip), &uio, 0, cr); spl_fstrans_unmark(cookie); if (error == 0) - error = size - uio_resid(&uio); + error = size - zfs_uio_resid(&uio); out: if (xzp) zrele(xzp); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index cdc2076702af..0caf31307718 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -85,9 +85,9 @@ zvol_write(void *arg) zv_request_t *zvr = arg; struct bio *bio = zvr->bio; int error = 0; - uio_t uio; + zfs_uio_t uio; - uio_bvec_init(&uio, bio); + zfs_uio_bvec_init(&uio, bio); zvol_state_t *zv = zvr->zv; ASSERT3P(zv, !=, NULL); @@ -247,9 +247,9 @@ zvol_read(void *arg) zv_request_t *zvr = arg; struct bio *bio = zvr->bio; int error = 0; - uio_t uio; + zfs_uio_t uio; - uio_bvec_init(&uio, bio); + zfs_uio_bvec_init(&uio, bio); zvol_state_t *zv = zvr->zv; ASSERT3P(zv, !=, NULL); |