diff options
| author | Ka Ho Ng <khng@FreeBSD.org> | 2021-08-05 15:20:42 +0000 |
|---|---|---|
| committer | Ka Ho Ng <khng@FreeBSD.org> | 2021-08-05 15:20:42 +0000 |
| commit | 0dc332bff200c940edc36c4715b629a2e1e9f9ae (patch) | |
| tree | 0c2995d8ed48879914d30fbc7228f0e718316fad /sys | |
| parent | abbb57d5a647f91847a860bd25b4f109c3abb390 (diff) | |
Diffstat (limited to 'sys')
| -rw-r--r-- | sys/bsm/audit_kevents.h | 1 | ||||
| -rw-r--r-- | sys/compat/freebsd32/freebsd32.h | 4 | ||||
| -rw-r--r-- | sys/compat/freebsd32/freebsd32_misc.c | 34 | ||||
| -rw-r--r-- | sys/compat/freebsd32/syscalls.master | 5 | ||||
| -rw-r--r-- | sys/kern/capabilities.conf | 5 | ||||
| -rw-r--r-- | sys/kern/sys_generic.c | 70 | ||||
| -rw-r--r-- | sys/kern/syscalls.master | 9 | ||||
| -rw-r--r-- | sys/kern/vfs_default.c | 122 | ||||
| -rw-r--r-- | sys/kern/vfs_vnops.c | 110 | ||||
| -rw-r--r-- | sys/kern/vnode_if.src | 11 | ||||
| -rw-r--r-- | sys/security/audit/audit_bsm.c | 12 | ||||
| -rw-r--r-- | sys/sys/fcntl.h | 20 | ||||
| -rw-r--r-- | sys/sys/file.h | 15 | ||||
| -rw-r--r-- | sys/sys/syscallsubr.h | 3 | ||||
| -rw-r--r-- | sys/sys/unistd.h | 1 | ||||
| -rw-r--r-- | sys/sys/vnode.h | 2 |
16 files changed, 424 insertions, 0 deletions
diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h index eeb928ecafdc..0da82de1fbcb 100644 --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -662,6 +662,7 @@ #define AUE_SPECIALFD 43266 /* FreeBSD-specific. */ #define AUE_AIO_WRITEV 43267 /* FreeBSD-specific. */ #define AUE_AIO_READV 43268 /* FreeBSD-specific. */ +#define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/compat/freebsd32/freebsd32.h b/sys/compat/freebsd32/freebsd32.h index 2e4f5155cbf4..8a14a42db813 100644 --- a/sys/compat/freebsd32/freebsd32.h +++ b/sys/compat/freebsd32/freebsd32.h @@ -435,5 +435,9 @@ struct ptrace_coredump32 { uint32_t pc_limit1, pc_limit2; }; +struct spacectl_range32 { + uint32_t r_offset1, r_offset2; + uint32_t r_len1, r_len2; +}; #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */ diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 736fd1123d53..c417a64d286a 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -3857,3 +3857,37 @@ freebsd32_ntp_adjtime(struct thread *td, struct freebsd32_ntp_adjtime_args *uap) } return (error); } + +int +freebsd32_fspacectl(struct thread *td, struct freebsd32_fspacectl_args *uap) +{ + struct spacectl_range rqsr, rmsr; + struct spacectl_range32 rqsr32, rmsr32; + int error, cerror; + + error = copyin(uap->rqsr, &rqsr32, sizeof(rqsr32)); + if (error != 0) + return (error); + rqsr.r_offset = PAIR32TO64(off_t, rqsr32.r_offset); + rqsr.r_len = PAIR32TO64(off_t, rqsr32.r_len); + + error = kern_fspacectl(td, uap->fd, uap->cmd, &rqsr, uap->flags, + &rmsr); + if (uap->rmsr != NULL) { +#if BYTE_ORDER == LITTLE_ENDIAN + rmsr32.r_offset1 = rmsr.r_offset; + rmsr32.r_offset2 = rmsr.r_offset >> 32; + rmsr32.r_len1 = rmsr.r_len; + rmsr32.r_len2 = rmsr.r_len >> 32; +#else + rmsr32.r_offset1 = rmsr.r_offset >> 32; + rmsr32.r_offset2 = rmsr.r_offset; + rmsr32.r_len1 = rmsr.r_len >> 32; + rmsr32.r_len2 = rmsr.r_len; +#endif + cerror = copyout(&rmsr32, uap->rmsr, sizeof(rmsr32)); + if (error == 0) + error = cerror; + } + return (error); +} diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master index aac788bf3956..3e53de2dc966 100644 --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -1176,5 +1176,10 @@ struct aiocb32 *aiocbp); } 579 AUE_AIO_READV STD { int freebsd32_aio_readv( \ struct aiocb32 *aiocbp); } +580 AUE_FSPACECTL STD { int freebsd32_fspacectl(int fd, \ + int cmd, \ + const struct spacectl_range32 *rqsr, \ + int flags, \ + struct spacectl_range32 *rmsr); } ; vim: syntax=off diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf index 602ec7088fc6..f53530eb7fa7 100644 --- a/sys/kern/capabilities.conf +++ b/sys/kern/capabilities.conf @@ -229,6 +229,11 @@ freebsd6_pread freebsd6_pwrite ## +## Allow I/O-related file operations, subject to capability rights. +## +fspacectl + +## ## Allow querying file and file system state with fstat(2) and fstatfs(2), ## subject to capability rights. ## diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index f86d494400e2..e6b2cba27a04 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -862,6 +862,76 @@ kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) } int +sys_fspacectl(struct thread *td, struct fspacectl_args *uap) +{ + struct spacectl_range rqsr, rmsr; + int error, cerror; + + error = copyin(uap->rqsr, &rqsr, sizeof(rqsr)); + if (error != 0) + return (error); + + error = kern_fspacectl(td, uap->fd, uap->cmd, &rqsr, uap->flags, + &rmsr); + if (uap->rmsr != NULL) { + cerror = copyout(&rmsr, uap->rmsr, sizeof(rmsr)); + if (error == 0) + error = cerror; + } + return (error); +} + +int +kern_fspacectl(struct thread *td, int fd, int cmd, + const struct spacectl_range *rqsr, int flags, struct spacectl_range *rmsrp) +{ + struct file *fp; + struct spacectl_range rmsr; + int error; + + AUDIT_ARG_FD(fd); + AUDIT_ARG_CMD(cmd); + AUDIT_ARG_FFLAGS(flags); + + if (rqsr == NULL) + return (EINVAL); + rmsr = *rqsr; + if (rmsrp != NULL) + *rmsrp = rmsr; + + if (cmd != SPACECTL_DEALLOC || + rqsr->r_offset < 0 || rqsr->r_len <= 0 || + rqsr->r_offset > OFF_MAX - rqsr->r_len || + (flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EINVAL); + + error = fget_write(td, fd, &cap_pwrite_rights, &fp); + if (error != 0) + return (error); + AUDIT_ARG_FILE(td->td_proc, fp); + if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { + error = ESPIPE; + goto out; + } + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto out; + } + + error = fo_fspacectl(fp, cmd, &rmsr.r_offset, &rmsr.r_len, flags, + td->td_ucred, td); + /* fspacectl is not restarted after signals if the file is modified. */ + if (rmsr.r_len != rqsr->r_len && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + if (rmsrp != NULL) + *rmsrp = rmsr; +out: + fdrop(fp, td); + return (error); +} + +int kern_specialfd(struct thread *td, int type, void *arg) { struct file *fp; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index af787908451a..11247aed8fd6 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3250,6 +3250,15 @@ _Inout_ struct aiocb *aiocbp ); } +580 AUE_FSPACECTL STD { + int fspacectl( + int fd, + int cmd, + _In_ const struct spacectl_range *rqsr, + int flags, + _Out_opt_ struct spacectl_range *rmsr, + ); + } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 63bca7810847..c42d5a795935 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -93,6 +93,7 @@ static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap); static int vop_stdstat(struct vop_stat_args *ap); static int vop_stdvput_pair(struct vop_vput_pair_args *ap); +static int vop_stddeallocate(struct vop_deallocate_args *ap); /* * This vnode table stores what we want to do if the filesystem doesn't @@ -117,6 +118,7 @@ struct vop_vector default_vnodeops = { .vop_advlockasync = vop_stdadvlockasync, .vop_advlockpurge = vop_stdadvlockpurge, .vop_allocate = vop_stdallocate, + .vop_deallocate = vop_stddeallocate, .vop_bmap = vop_stdbmap, .vop_close = VOP_NULL, .vop_fsync = VOP_NULL, @@ -518,6 +520,7 @@ vop_stdpathconf(ap) case _PC_ACL_EXTENDED: case _PC_ACL_NFS4: case _PC_CAP_PRESENT: + case _PC_DEALLOC_PRESENT: case _PC_INF_PRESENT: case _PC_MAC_PRESENT: *ap->a_retval = 0; @@ -1069,6 +1072,125 @@ vop_stdallocate(struct vop_allocate_args *ap) return (error); } +static int +vp_zerofill(struct vnode *vp, struct vattr *vap, off_t *offsetp, off_t *lenp, + struct ucred *cred) +{ + int iosize; + int error = 0; + struct iovec aiov; + struct uio auio; + struct thread *td; + off_t offset, len; + + iosize = vap->va_blocksize; + td = curthread; + offset = *offsetp; + len = *lenp; + + if (iosize == 0) + iosize = BLKDEV_IOSIZE; + /* If va_blocksize is 512 bytes, iosize will be 4 kilobytes */ + iosize = min(iosize * 8, ZERO_REGION_SIZE); + + while (len > 0) { + int xfersize = iosize; + if (offset % iosize != 0) + xfersize -= offset % iosize; + if (xfersize > len) + xfersize = len; + + aiov.iov_base = __DECONST(void *, zero_region); + aiov.iov_len = xfersize; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = xfersize; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_td = td; + + error = VOP_WRITE(vp, &auio, 0, cred); + if (error != 0) { + len -= xfersize - auio.uio_resid; + offset += xfersize - auio.uio_resid; + break; + } + + len -= xfersize; + offset += xfersize; + } + + *offsetp = offset; + *lenp = len; + return (error); +} + +static int +vop_stddeallocate(struct vop_deallocate_args *ap) +{ + struct vnode *vp; + off_t offset, len; + struct ucred *cred; + int error; + struct vattr va; + off_t noff, xfersize, rem; + + vp = ap->a_vp; + offset = *ap->a_offset; + len = *ap->a_len; + cred = ap->a_cred; + + error = VOP_GETATTR(vp, &va, cred); + if (error) + return (error); + + len = omin(OFF_MAX - offset, *ap->a_len); + while (len > 0) { + noff = offset; + error = vn_bmap_seekhole_locked(vp, FIOSEEKDATA, &noff, cred); + if (error) { + if (error != ENXIO) + /* XXX: Is it okay to fallback further? */ + goto out; + + /* + * No more data region to be filled + */ + len = 0; + error = 0; + break; + } + KASSERT(noff >= offset, ("FIOSEEKDATA going backward")); + if (noff != offset) { + xfersize = omin(noff - offset, len); + len -= xfersize; + offset += xfersize; + if (len == 0) + break; + } + error = vn_bmap_seekhole_locked(vp, FIOSEEKHOLE, &noff, cred); + if (error) + goto out; + + /* Fill zeroes */ + xfersize = rem = omin(noff - offset, len); + error = vp_zerofill(vp, &va, &offset, &rem, cred); + if (error) { + len -= xfersize - rem; + goto out; + } + + len -= xfersize; + if (should_yield()) + break; + } +out: + *ap->a_offset = offset; + *ap->a_len = len; + return (error); +} + int vop_stdadvise(struct vop_advise_args *ap) { diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index ccc468d71737..c54f55a99036 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -106,6 +106,7 @@ static fo_kqfilter_t vn_kqfilter; static fo_close_t vn_closefile; static fo_mmap_t vn_mmap; static fo_fallocate_t vn_fallocate; +static fo_fspacectl_t vn_fspacectl; struct fileops vnops = { .fo_read = vn_io_fault, @@ -123,6 +124,7 @@ struct fileops vnops = { .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = vn_mmap, .fo_fallocate = vn_fallocate, + .fo_fspacectl = vn_fspacectl, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -3439,6 +3441,114 @@ vn_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) return (error); } +static int +vn_deallocate_impl(struct vnode *vp, off_t *offset, off_t *length, int flags, + int ioflg, struct ucred *active_cred, struct ucred *file_cred) +{ + struct mount *mp; + void *rl_cookie; + off_t off, len; + int error; +#ifdef AUDIT + bool audited_vnode1 = false; +#endif + + rl_cookie = NULL; + error = 0; + mp = NULL; + off = *offset; + len = *length; + + if ((ioflg & (IO_NODELOCKED|IO_RANGELOCKED)) == 0) + rl_cookie = vn_rangelock_wlock(vp, off, off + len); + while (len > 0 && error == 0) { + /* + * Try to deallocate the longest range in one pass. + * In case a pass takes too long to be executed, it returns + * partial result. The residue will be proceeded in the next + * pass. + */ + + if ((ioflg & IO_NODELOCKED) == 0) { + bwillwrite(); + if ((error = vn_start_write(vp, &mp, + V_WAIT | PCATCH)) != 0) + goto out; + vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); + } +#ifdef AUDIT + if (!audited_vnode1) { + AUDIT_ARG_VNODE1(vp); + audited_vnode1 = true; + } +#endif + +#ifdef MAC + if ((ioflg & IO_NOMACCHECK) == 0) + error = mac_vnode_check_write(active_cred, file_cred, + vp); +#endif + if (error == 0) + error = VOP_DEALLOCATE(vp, &off, &len, flags, + active_cred); + + if ((ioflg & IO_NODELOCKED) == 0) { + VOP_UNLOCK(vp); + if (mp != NULL) { + vn_finished_write(mp); + mp = NULL; + } + } + } +out: + if (rl_cookie != NULL) + vn_rangelock_unlock(vp, rl_cookie); + *offset = off; + *length = len; + return (error); +} + +int +vn_deallocate(struct vnode *vp, off_t *offset, off_t *length, int flags, + int ioflg, struct ucred *active_cred, struct ucred *file_cred) +{ + if (*offset < 0 || *length <= 0 || *length > OFF_MAX - *offset || + flags != 0) + return (EINVAL); + if (vp->v_type != VREG) + return (ENODEV); + + return (vn_deallocate_impl(vp, offset, length, flags, ioflg, + active_cred, file_cred)); +} + +static int +vn_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags, + struct ucred *active_cred, struct thread *td) +{ + int error; + struct vnode *vp; + + vp = fp->f_vnode; + + if (cmd != SPACECTL_DEALLOC || *offset < 0 || *length <= 0 || + *length > OFF_MAX - *offset || flags != 0) + return (EINVAL); + if (vp->v_type != VREG) + return (ENODEV); + + switch (cmd) { + case SPACECTL_DEALLOC: + error = vn_deallocate_impl(vp, offset, length, flags, 0, + active_cred, fp->f_cred); + break; + default: + panic("vn_fspacectl: unknown cmd %d", cmd); + } + + return (error); +} + static u_long vn_lock_pair_pause_cnt; SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, &vn_lock_pair_pause_cnt, 0, diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index b506237f385d..97ac1cff6705 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -801,6 +801,17 @@ vop_vput_pair { }; +%% deallocate vp L L L + +vop_deallocate { + IN struct vnode *vp; + INOUT off_t *offset; + INOUT off_t *len; + IN int flags; + IN struct ucred *cred; +}; + + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, diff --git a/sys/security/audit/audit_bsm.c b/sys/security/audit/audit_bsm.c index d350ef3cf3c2..1f9918a42159 100644 --- a/sys/security/audit/audit_bsm.c +++ b/sys/security/audit/audit_bsm.c @@ -1091,6 +1091,18 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) FD_VNODE1_TOKENS; break; + case AUE_FSPACECTL: + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(2, "operation", ar->ar_arg_cmd); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(4, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + FD_VNODE1_TOKENS; + break; + case AUE_RFORK: if (ARG_IS_VALID(kar, ARG_FFLAGS)) { tok = au_to_arg32(1, "flags", ar->ar_arg_fflags); diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 8fa52aeacafd..934c648aecc0 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -323,6 +323,14 @@ struct __oflock { short l_type; /* lock type: read/write, etc. */ short l_whence; /* type of l_start */ }; + +/* + * Space control offset/length description + */ +struct spacectl_range { + off_t r_offset; /* starting offset */ + off_t r_len; /* length */ +}; #endif #if __BSD_VISIBLE @@ -352,6 +360,16 @@ struct __oflock { * similar syscalls. */ #define FD_NONE -200 + +/* + * Commands for fspacectl(2) + */ +#define SPACECTL_DEALLOC 1 /* deallocate space */ + +/* + * fspacectl(2) flags + */ +#define SPACECTL_F_SUPPORTED 0 #endif #ifndef _KERNEL @@ -361,6 +379,8 @@ int creat(const char *, mode_t); int fcntl(int, int, ...); #if __BSD_VISIBLE int flock(int, int); +int fspacectl(int, int, const struct spacectl_range *, int, + struct spacectl_range *); #endif #if __POSIX_VISIBLE >= 200809 int openat(int, const char *, int, ...); diff --git a/sys/sys/file.h b/sys/sys/file.h index b16e23bdfbcf..8a790a25fc6b 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -129,6 +129,9 @@ typedef int fo_add_seals_t(struct file *fp, int flags); typedef int fo_get_seals_t(struct file *fp, int *flags); typedef int fo_fallocate_t(struct file *fp, off_t offset, off_t len, struct thread *td); +typedef int fo_fspacectl_t(struct file *fp, int cmd, + off_t *offset, off_t *length, int flags, + struct ucred *active_cred, struct thread *td); typedef int fo_flags_t; struct fileops { @@ -150,6 +153,7 @@ struct fileops { fo_add_seals_t *fo_add_seals; fo_get_seals_t *fo_get_seals; fo_fallocate_t *fo_fallocate; + fo_fspacectl_t *fo_fspacectl; fo_flags_t fo_flags; /* DFLAG_* below */ }; @@ -472,6 +476,17 @@ fo_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) return ((*fp->f_ops->fo_fallocate)(fp, offset, len, td)); } +static __inline int fo_fspacectl(struct file *fp, int cmd, off_t *offset, + off_t *length, int flags, struct ucred *active_cred, struct thread *td) +{ + + if (fp->f_ops->fo_fspacectl == NULL) + return (ENODEV); + return ((*fp->f_ops->fo_fspacectl)(fp, cmd, offset, length, flags, + active_cred, td)); +} + + #endif /* _KERNEL */ #endif /* !SYS_FILE_H */ diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index eb7b82946988..dc1b3d6a83de 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -59,6 +59,7 @@ struct rusage; struct sched_param; union semun; struct sockaddr; +struct spacectl_range; struct stat; struct thr_param; struct timex; @@ -233,6 +234,8 @@ int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); +int kern_fspacectl(struct thread *td, int fd, int cmd, + const struct spacectl_range *, int flags, struct spacectl_range *); int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, void *data); int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h index 3b3de3aa33bc..ef8835a812d2 100644 --- a/sys/sys/unistd.h +++ b/sys/sys/unistd.h @@ -156,6 +156,7 @@ #define _PC_INF_PRESENT 62 #define _PC_MAC_PRESENT 63 #define _PC_ACL_NFS4 64 +#define _PC_DEALLOC_PRESENT 65 #endif /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 702fd6623e6a..56591a8d8a8d 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -741,6 +741,8 @@ int vn_copy_file_range(struct vnode *invp, off_t *inoffp, struct vnode *outvp, off_t *outoffp, size_t *lenp, unsigned int flags, struct ucred *incred, struct ucred *outcred, struct thread *fsize_td); +int vn_deallocate(struct vnode *vp, off_t *offset, off_t *length, int flags, + int ioflg, struct ucred *active_cred, struct ucred *file_cred); void vn_finished_write(struct mount *mp); void vn_finished_secondary_write(struct mount *mp); int vn_fsync_buf(struct vnode *vp, int waitfor); |
