aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Norris <rob.norris@klarasystems.com>2023-06-27 13:44:53 +0000
committerBrian Behlendorf <behlendorf1@llnl.gov>2023-07-24 23:36:38 +0000
commit5a35c68b67473a7ae0a75c4beb51c928d3e2628d (patch)
tree3caf2148cf291c0619b2f7516507cc3358d90ac0
parentf6facd242937e52ab1ad5a7fd3b6bbbb6ce08050 (diff)
downloadsrc-5a35c68b67473a7ae0a75c4beb51c928d3e2628d.tar.gz
src-5a35c68b67473a7ae0a75c4beb51c928d3e2628d.zip
-rw-r--r--config/kernel-vfs-file_range.m4164
-rw-r--r--config/kernel.m410
-rw-r--r--include/os/linux/zfs/sys/zpl.h14
-rw-r--r--module/Kbuild.in1
-rw-r--r--module/os/linux/zfs/zpl_file.c13
-rw-r--r--module/os/linux/zfs/zpl_file_range.c183
6 files changed, 384 insertions, 1 deletions
diff --git a/config/kernel-vfs-file_range.m4 b/config/kernel-vfs-file_range.m4
new file mode 100644
index 000000000000..cc96404d8bbe
--- /dev/null
+++ b/config/kernel-vfs-file_range.m4
@@ -0,0 +1,164 @@
+dnl #
+dnl # The *_file_range APIs have a long history:
+dnl #
+dnl # 2.6.29: BTRFS_IOC_CLONE and BTRFS_IOC_CLONE_RANGE ioctl introduced
+dnl # 3.12: BTRFS_IOC_FILE_EXTENT_SAME ioctl introduced
+dnl #
+dnl # 4.5: copy_file_range() syscall introduced, added to VFS
+dnl # 4.5: BTRFS_IOC_CLONE and BTRFS_IOC_CLONE_RANGE renamed to FICLONE ands
+dnl # FICLONERANGE, added to VFS as clone_file_range()
+dnl # 4.5: BTRFS_IOC_FILE_EXTENT_SAME renamed to FIDEDUPERANGE, added to VFS
+dnl # as dedupe_file_range()
+dnl #
+dnl # 4.20: VFS clone_file_range() and dedupe_file_range() replaced by
+dnl # remap_file_range()
+dnl #
+dnl # 5.3: VFS copy_file_range() expected to do its own fallback,
+dnl # generic_copy_file_range() added to support it
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE], [
+ ZFS_LINUX_TEST_SRC([vfs_copy_file_range], [
+ #include <linux/fs.h>
+
+ static ssize_t test_copy_file_range(struct file *src_file,
+ loff_t src_off, struct file *dst_file, loff_t dst_off,
+ size_t len, unsigned int flags) {
+ (void) src_file; (void) src_off;
+ (void) dst_file; (void) dst_off;
+ (void) len; (void) flags;
+ return (0);
+ }
+
+ static const struct file_operations
+ fops __attribute__ ((unused)) = {
+ .copy_file_range = test_copy_file_range,
+ };
+ ],[])
+])
+AC_DEFUN([ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE], [
+ AC_MSG_CHECKING([whether fops->copy_file_range() is available])
+ ZFS_LINUX_TEST_RESULT([vfs_copy_file_range], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_VFS_COPY_FILE_RANGE, 1,
+ [fops->copy_file_range() is available])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE], [
+ ZFS_LINUX_TEST_SRC([generic_copy_file_range], [
+ #include <linux/fs.h>
+ ], [
+ struct file *src_file __attribute__ ((unused)) = NULL;
+ loff_t src_off __attribute__ ((unused)) = 0;
+ struct file *dst_file __attribute__ ((unused)) = NULL;
+ loff_t dst_off __attribute__ ((unused)) = 0;
+ size_t len __attribute__ ((unused)) = 0;
+ unsigned int flags __attribute__ ((unused)) = 0;
+ generic_copy_file_range(src_file, src_off, dst_file, dst_off,
+ len, flags);
+ ])
+])
+AC_DEFUN([ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE], [
+ AC_MSG_CHECKING([whether generic_copy_file_range() is available])
+ ZFS_LINUX_TEST_RESULT_SYMBOL([generic_copy_file_range],
+ [generic_copy_file_range], [fs/read_write.c], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_VFS_GENERIC_COPY_FILE_RANGE, 1,
+ [generic_copy_file_range() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE], [
+ ZFS_LINUX_TEST_SRC([vfs_clone_file_range], [
+ #include <linux/fs.h>
+
+ static int test_clone_file_range(struct file *src_file,
+ loff_t src_off, struct file *dst_file, loff_t dst_off,
+ u64 len) {
+ (void) src_file; (void) src_off;
+ (void) dst_file; (void) dst_off;
+ (void) len;
+ return (0);
+ }
+
+ static const struct file_operations
+ fops __attribute__ ((unused)) = {
+ .clone_file_range = test_clone_file_range,
+ };
+ ],[])
+])
+AC_DEFUN([ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE], [
+ AC_MSG_CHECKING([whether fops->clone_file_range() is available])
+ ZFS_LINUX_TEST_RESULT([vfs_clone_file_range], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_VFS_CLONE_FILE_RANGE, 1,
+ [fops->clone_file_range() is available])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE], [
+ ZFS_LINUX_TEST_SRC([vfs_dedupe_file_range], [
+ #include <linux/fs.h>
+
+ static int test_dedupe_file_range(struct file *src_file,
+ loff_t src_off, struct file *dst_file, loff_t dst_off,
+ u64 len) {
+ (void) src_file; (void) src_off;
+ (void) dst_file; (void) dst_off;
+ (void) len;
+ return (0);
+ }
+
+ static const struct file_operations
+ fops __attribute__ ((unused)) = {
+ .dedupe_file_range = test_dedupe_file_range,
+ };
+ ],[])
+])
+AC_DEFUN([ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE], [
+ AC_MSG_CHECKING([whether fops->dedupe_file_range() is available])
+ ZFS_LINUX_TEST_RESULT([vfs_dedupe_file_range], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_VFS_DEDUPE_FILE_RANGE, 1,
+ [fops->dedupe_file_range() is available])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE], [
+ ZFS_LINUX_TEST_SRC([vfs_remap_file_range], [
+ #include <linux/fs.h>
+
+ static loff_t test_remap_file_range(struct file *src_file,
+ loff_t src_off, struct file *dst_file, loff_t dst_off,
+ loff_t len, unsigned int flags) {
+ (void) src_file; (void) src_off;
+ (void) dst_file; (void) dst_off;
+ (void) len; (void) flags;
+ return (0);
+ }
+
+ static const struct file_operations
+ fops __attribute__ ((unused)) = {
+ .remap_file_range = test_remap_file_range,
+ };
+ ],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE], [
+ AC_MSG_CHECKING([whether fops->remap_file_range() is available])
+ ZFS_LINUX_TEST_RESULT([vfs_remap_file_range], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_VFS_REMAP_FILE_RANGE, 1,
+ [fops->remap_file_range() is available])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index cb7e736c9a43..b17ccfdeec92 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -116,6 +116,11 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE
ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS
ZFS_AC_KERNEL_SRC_VFS_IOV_ITER
+ ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE
+ ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE
+ ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE
+ ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE
+ ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE
ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS
ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE
ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN
@@ -249,6 +254,11 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_VFS_RW_ITERATE
ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS
ZFS_AC_KERNEL_VFS_IOV_ITER
+ ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE
+ ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE
+ ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE
+ ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE
+ ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE
ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS
ZFS_AC_KERNEL_FOLLOW_DOWN_ONE
ZFS_AC_KERNEL_MAKE_REQUEST_FN
diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h
index 2b302e9dab07..8b0e79afb0f1 100644
--- a/include/os/linux/zfs/sys/zpl.h
+++ b/include/os/linux/zfs/sys/zpl.h
@@ -180,6 +180,20 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
}
#endif /* HAVE_VFS_ITERATE */
+
+/* zpl_file_range.c */
+
+/* handlers for file_operations of the same name */
+extern ssize_t zpl_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags);
+extern loff_t zpl_remap_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags);
+extern int zpl_clone_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, uint64_t len);
+extern int zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, uint64_t len);
+
+
#if defined(HAVE_INODE_TIMESTAMP_TRUNCATE)
#define zpl_inode_timestamp_truncate(ts, ip) timestamp_truncate(ts, ip)
#elif defined(HAVE_INODE_TIMESPEC64_TIMES)
diff --git a/module/Kbuild.in b/module/Kbuild.in
index 485331ac655e..c132171592a8 100644
--- a/module/Kbuild.in
+++ b/module/Kbuild.in
@@ -461,6 +461,7 @@ ZFS_OBJS_OS := \
zpl_ctldir.o \
zpl_export.o \
zpl_file.o \
+ zpl_file_range.o \
zpl_inode.o \
zpl_super.o \
zpl_xattr.o \
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index e690525d3cd4..92b603e98a23 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -1283,7 +1283,6 @@ zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
#endif /* CONFIG_COMPAT */
-
const struct address_space_operations zpl_address_space_operations = {
#ifdef HAVE_VFS_READPAGES
.readpages = zpl_readpages,
@@ -1333,6 +1332,18 @@ const struct file_operations zpl_file_operations = {
.aio_fsync = zpl_aio_fsync,
#endif
.fallocate = zpl_fallocate,
+#ifdef HAVE_VFS_COPY_FILE_RANGE
+ .copy_file_range = zpl_copy_file_range,
+#endif
+#ifdef HAVE_VFS_REMAP_FILE_RANGE
+ .remap_file_range = zpl_remap_file_range,
+#endif
+#ifdef HAVE_VFS_CLONE_FILE_RANGE
+ .clone_file_range = zpl_clone_file_range,
+#endif
+#ifdef HAVE_VFS_DEDUPE_FILE_RANGE
+ .dedupe_file_range = zpl_dedupe_file_range,
+#endif
#ifdef HAVE_FILE_FADVISE
.fadvise = zpl_fadvise,
#endif
diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c
new file mode 100644
index 000000000000..db387a748130
--- /dev/null
+++ b/module/os/linux/zfs/zpl_file_range.c
@@ -0,0 +1,183 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2023, Klara Inc.
+ */
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <linux/fs.h>
+#include <sys/file.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_vnops.h>
+#include <sys/zfeature.h>
+
+/*
+ * Clone part of a file via block cloning.
+ *
+ * Note that we are not required to update file offsets; the kernel will take
+ * care of that depending on how it was called.
+ */
+static ssize_t
+__zpl_clone_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, size_t len)
+{
+ struct inode *src_i = file_inode(src_file);
+ struct inode *dst_i = file_inode(dst_file);
+ uint64_t src_off_o = (uint64_t)src_off;
+ uint64_t dst_off_o = (uint64_t)dst_off;
+ uint64_t len_o = (uint64_t)len;
+ cred_t *cr = CRED();
+ fstrans_cookie_t cookie;
+ int err;
+
+ if (!spa_feature_is_enabled(
+ dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
+ return (-EOPNOTSUPP);
+
+ if (src_i != dst_i)
+ spl_inode_lock_shared(src_i);
+ spl_inode_lock(dst_i);
+
+ crhold(cr);
+ cookie = spl_fstrans_mark();
+
+ err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
+ &dst_off_o, &len_o, cr);
+
+ spl_fstrans_unmark(cookie);
+ crfree(cr);
+
+ spl_inode_unlock(dst_i);
+ if (src_i != dst_i)
+ spl_inode_unlock_shared(src_i);
+
+ if (err < 0)
+ return (err);
+
+ return ((ssize_t)len_o);
+}
+
+#ifdef HAVE_VFS_COPY_FILE_RANGE
+/*
+ * Entry point for copy_file_range(). Copy len bytes from src_off in src_file
+ * to dst_off in dst_file. We are permitted to do this however we like, so we
+ * try to just clone the blocks, and if we can't support it, fall back to the
+ * kernel's generic byte copy function.
+ */
+ssize_t
+zpl_copy_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags)
+{
+ ssize_t ret;
+
+ if (flags != 0)
+ return (-EINVAL);
+
+ /* Try to do it via zfs_clone_range() */
+ ret =__zpl_clone_file_range(src_file, src_off,
+ dst_file, dst_off, len);
+
+#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
+ /*
+ * Since Linux 5.3 the filesystem driver is responsible for executing
+ * an appropriate fallback, and a generic fallback function is provided.
+ */
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len, flags);
+#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */
+
+ return (ret);
+}
+#endif /* HAVE_VFS_COPY_FILE_RANGE */
+
+#ifdef HAVE_VFS_REMAP_FILE_RANGE
+/*
+ * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
+ *
+ * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
+ * that they must clone - they cannot fall back to copying. FICLONE is exactly
+ * FICLONERANGE, for the entire file. We don't need to try to tell them apart;
+ * the kernel will sort that out for us.
+ *
+ * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
+ * range in both files and if they're the same, arrange for them to be backed
+ * by the same storage.
+ */
+loff_t
+zpl_remap_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags)
+{
+ if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
+ return (-EINVAL);
+
+ /*
+ * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given
+ * range if we want. Its designed for filesystems that make data past
+ * EOF available, and don't want it to be visible in both files. ZFS
+ * doesn't do that, so we just turn the flag off.
+ */
+ flags &= ~REMAP_FILE_CAN_SHORTEN;
+
+ if (flags & REMAP_FILE_DEDUP)
+ /* No support for dedup yet */
+ return (-EOPNOTSUPP);
+
+ /* Zero length means to clone everything to the end of the file */
+ if (len == 0)
+ len = i_size_read(file_inode(src_file)) - src_off;
+
+ return (__zpl_clone_file_range(src_file, src_off,
+ dst_file, dst_off, len));
+}
+#endif /* HAVE_VFS_REMAP_FILE_RANGE */
+
+#ifdef HAVE_VFS_CLONE_FILE_RANGE
+/*
+ * Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
+ */
+int
+zpl_clone_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, uint64_t len)
+{
+ /* Zero length means to clone everything to the end of the file */
+ if (len == 0)
+ len = i_size_read(file_inode(src_file)) - src_off;
+
+ return (__zpl_clone_file_range(src_file, src_off,
+ dst_file, dst_off, len));
+}
+#endif /* HAVE_VFS_CLONE_FILE_RANGE */
+
+#ifdef HAVE_VFS_DEDUPE_FILE_RANGE
+/*
+ * Entry point for FIDEDUPERANGE, before Linux 4.20.
+ */
+int
+zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, uint64_t len)
+{
+ /* No support for dedup yet */
+ return (-EOPNOTSUPP);
+}
+#endif /* HAVE_VFS_DEDUPE_FILE_RANGE */