aboutsummaryrefslogtreecommitdiff
path: root/sys/kern/vfs_lookup.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern/vfs_lookup.c')
-rw-r--r--sys/kern/vfs_lookup.c177
1 files changed, 108 insertions, 69 deletions
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 86c7bdaa02c0..fb3e6a7a2534 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -75,14 +75,20 @@ static void NDVALIDATE_impl(struct nameidata *, int);
#endif
/*
+ * Reset ndp to its original state.
+ */
+#define NDRESET(ndp) do { \
+ NDREINIT_DBG(ndp); \
+ ndp->ni_resflags = 0; \
+ ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \
+} while (0)
+/*
* Prepare namei() to restart. Reset components to its original state and set
* ISRESTARTED flag which signals the underlying lookup code to change the root
* from ABI root to actual root and prevents a further restarts.
*/
#define NDRESTART(ndp) do { \
- NDREINIT_DBG(ndp); \
- ndp->ni_resflags = 0; \
- ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \
+ NDRESET(ndp); \
ndp->ni_cnd.cn_flags |= ISRESTARTED; \
} while (0)
@@ -162,8 +168,8 @@ static struct vop_vector crossmp_vnodeops = {
*/
struct nameicap_tracker {
- struct vnode *dp;
TAILQ_ENTRY(nameicap_tracker) nm_link;
+ struct mount *mp;
};
/* Zone for cap mode tracker elements used for dotdot capability checks. */
@@ -192,49 +198,75 @@ SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN,
"enables \"..\" components in path lookup in capability mode "
"on non-local mount");
-static void
+static int
nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp)
{
struct nameicap_tracker *nt;
+ struct mount *mp;
+ int error;
if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR)
- return;
+ return (0);
+ mp = NULL;
+ error = VOP_GETWRITEMOUNT(dp, &mp);
+ if (error != 0)
+ return (error);
nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head);
- if (nt != NULL && nt->dp == dp)
- return;
+ if (nt != NULL && nt->mp == mp) {
+ vfs_rel(mp);
+ return (0);
+ }
nt = malloc(sizeof(*nt), M_NAMEITRACKER, M_WAITOK);
- vhold(dp);
- nt->dp = dp;
- TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link);
+ nt->mp = mp;
+ error = lockmgr(&mp->mnt_renamelock, LK_SHARED | LK_NOWAIT, 0);
+ if (error != 0) {
+ MPASS(ndp->ni_nctrack_mnt == NULL);
+ ndp->ni_nctrack_mnt = mp;
+ free(nt, M_NAMEITRACKER);
+ error = ERESTART;
+ } else {
+ TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link);
+ }
+ return (error);
}
static void
-nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first)
+nameicap_cleanup(struct nameidata *ndp, int error)
{
struct nameicap_tracker *nt, *nt1;
+ struct mount *mp;
+
+ KASSERT((ndp->ni_nctrack_mnt == NULL &&
+ TAILQ_EMPTY(&ndp->ni_cap_tracker)) ||
+ (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0,
+ ("tracker active and not strictrelative"));
- nt = first;
- TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) {
+ TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) {
+ mp = nt->mp;
+ lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0);
+ vfs_rel(mp);
TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link);
- vdrop(nt->dp);
free(nt, M_NAMEITRACKER);
}
-}
-static void
-nameicap_cleanup(struct nameidata *ndp)
-{
- KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) ||
- (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative"));
- nameicap_cleanup_from(ndp, NULL);
+ mp = ndp->ni_nctrack_mnt;
+ if (mp != NULL) {
+ if (error == ERESTART) {
+ lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0);
+ lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0);
+ }
+ vfs_rel(mp);
+ ndp->ni_nctrack_mnt = NULL;
+ }
}
/*
- * For dotdot lookups in capability mode, only allow the component
- * lookup to succeed if the resulting directory was already traversed
- * during the operation. This catches situations where already
- * traversed directory is moved to different parent, and then we walk
- * over it with dotdots.
+ * For dotdot lookups in capability mode, disallow walking over the
+ * directory no_rbeneath_dpp that was used as the starting point of
+ * the lookup. Since we take the mnt_renamelocks of all mounts we
+ * ever walked over during lookup, parallel renames are disabled.
+ * This prevents the situation where we circumvent walk over
+ * ni_rbeneath_dpp following dotdots.
*
* Also allow to force failure of dotdot lookups for non-local
* filesystems, where external agents might assist local lookups to
@@ -243,7 +275,6 @@ nameicap_cleanup(struct nameidata *ndp)
static int
nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp)
{
- struct nameicap_tracker *nt;
struct mount *mp;
if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf &
@@ -253,22 +284,16 @@ nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp)
NI_LCF_CAP_DOTDOT_KTR)) == NI_LCF_STRICTREL_KTR))
NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf);
if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0)
- return (ENOTCAPABLE);
+ goto violation;
+ if (dp == ndp->ni_rbeneath_dpp)
+ goto violation;
mp = dp->v_mount;
if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL &&
(mp->mnt_flag & MNT_LOCAL) == 0)
- goto capfail;
- TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head,
- nm_link) {
- if (dp == nt->dp) {
- nt = TAILQ_NEXT(nt, nm_link);
- if (nt != NULL)
- nameicap_cleanup_from(ndp, nt);
- return (0);
- }
- }
+ goto violation;
+ return (0);
-capfail:
+violation:
if (__predict_false((ndp->ni_lcf & NI_LCF_STRICTREL_KTR) != 0))
NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf);
return (ENOTCAPABLE);
@@ -394,6 +419,8 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
NI_LCF_CAP_DOTDOT;
}
}
+ if (error == 0 && (ndp->ni_lcf & NI_LCF_STRICTREL) != 0)
+ ndp->ni_rbeneath_dpp = *dpp;
/*
* If we are auditing the kernel pathname, save the user pathname.
@@ -631,6 +658,7 @@ restart:
error = namei_getpath(ndp);
if (__predict_false(error != 0)) {
namei_cleanup_cnp(cnp);
+ nameicap_cleanup(ndp, error);
SDT_PROBE4(vfs, namei, lookup, return, error, NULL,
false, ndp);
return (error);
@@ -661,12 +689,12 @@ restart:
else if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir &&
(cnp->cn_flags & ISRESTARTED) == 0)) {
namei_cleanup_cnp(cnp);
+ nameicap_cleanup(ndp, ERESTART);
NDRESTART(ndp);
goto restart;
}
return (error);
case CACHE_FPL_STATUS_PARTIAL:
- TAILQ_INIT(&ndp->ni_cap_tracker);
dp = ndp->ni_startdir;
break;
case CACHE_FPL_STATUS_DESTROYED:
@@ -674,18 +702,21 @@ restart:
error = namei_getpath(ndp);
if (__predict_false(error != 0)) {
namei_cleanup_cnp(cnp);
+ nameicap_cleanup(ndp, error);
return (error);
}
cnp->cn_nameptr = cnp->cn_pnbuf;
/* FALLTHROUGH */
case CACHE_FPL_STATUS_ABORTED:
- TAILQ_INIT(&ndp->ni_cap_tracker);
MPASS(ndp->ni_lcf == 0);
if (*cnp->cn_pnbuf == '\0') {
if ((cnp->cn_flags & EMPTYPATH) != 0) {
- return (namei_emptypath(ndp));
+ error = namei_emptypath(ndp);
+ nameicap_cleanup(ndp, error);
+ return (error);
}
namei_cleanup_cnp(cnp);
+ nameicap_cleanup(ndp, ENOENT);
SDT_PROBE4(vfs, namei, lookup, return, ENOENT, NULL,
false, ndp);
return (ENOENT);
@@ -693,6 +724,7 @@ restart:
error = namei_setup(ndp, &dp, &pwd);
if (error != 0) {
namei_cleanup_cnp(cnp);
+ nameicap_cleanup(ndp, error);
return (error);
}
break;
@@ -705,16 +737,23 @@ restart:
ndp->ni_startdir = dp;
error = vfs_lookup(ndp);
if (error != 0) {
- if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir &&
- error == ENOENT &&
- (cnp->cn_flags & ISRESTARTED) == 0)) {
- nameicap_cleanup(ndp);
- pwd_drop(pwd);
- namei_cleanup_cnp(cnp);
- NDRESTART(ndp);
- goto restart;
- } else
+ uint64_t was_restarted;
+ bool abi_restart;
+
+ was_restarted = ndp->ni_cnd.cn_flags &
+ ISRESTARTED;
+ abi_restart = pwd->pwd_adir != pwd->pwd_rdir &&
+ error == ENOENT && was_restarted == 0;
+ if (error != ERESTART && !abi_restart)
goto out;
+ nameicap_cleanup(ndp, error);
+ pwd_drop(pwd);
+ namei_cleanup_cnp(cnp);
+ NDRESET(ndp);
+ if (abi_restart)
+ was_restarted = ISRESTARTED;
+ ndp->ni_cnd.cn_flags |= was_restarted;
+ goto restart;
}
/*
@@ -723,7 +762,7 @@ restart:
if ((cnp->cn_flags & ISSYMLINK) == 0) {
SDT_PROBE4(vfs, namei, lookup, return, error,
ndp->ni_vp, false, ndp);
- nameicap_cleanup(ndp);
+ nameicap_cleanup(ndp, 0);
pwd_drop(pwd);
NDVALIDATE(ndp);
return (0);
@@ -756,10 +795,10 @@ restart:
ndp->ni_vp = NULL;
vrele(ndp->ni_dvp);
out:
- MPASS(error != 0);
+ MPASS(error != 0 && error != ERESTART);
SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp);
namei_cleanup_cnp(cnp);
- nameicap_cleanup(ndp);
+ nameicap_cleanup(ndp, error);
pwd_drop(pwd);
return (error);
}
@@ -1185,7 +1224,9 @@ dirloop:
}
}
- nameicap_tracker_add(ndp, dp);
+ error = nameicap_tracker_add(ndp, dp);
+ if (error != 0)
+ goto bad;
/*
* Make sure degenerate names don't get here, their handling was
@@ -1210,9 +1251,7 @@ dirloop:
* the jail or chroot, don't let them out.
* 5. If doing a capability lookup and lookup_cap_dotdot is
* enabled, return ENOTCAPABLE if the lookup would escape
- * from the initial file descriptor directory. Checks are
- * done by ensuring that namei() already traversed the
- * result of dotdot lookup.
+ * from the initial file descriptor directory.
*/
if (cnp->cn_flags & ISDOTDOT) {
if (__predict_false((ndp->ni_lcf & (NI_LCF_STRICTREL_KTR |
@@ -1238,7 +1277,7 @@ dirloop:
NI_CAP_VIOLATION(ndp, cnp->cn_pnbuf);
if ((ndp->ni_lcf & NI_LCF_STRICTREL) != 0) {
error = ENOTCAPABLE;
- goto capdotdot;
+ goto bad;
}
}
if (isroot || ((dp->v_vflag & VV_ROOT) != 0 &&
@@ -1261,11 +1300,6 @@ dirloop:
vn_lock(dp,
enforce_lkflags(dp->v_mount, cnp->cn_lkflags |
LK_RETRY));
- error = nameicap_check_dotdot(ndp, dp);
- if (error != 0) {
-capdotdot:
- goto bad;
- }
}
}
@@ -1314,7 +1348,9 @@ unionlookup:
vn_lock(dp,
enforce_lkflags(dp->v_mount, cnp->cn_lkflags |
LK_RETRY));
- nameicap_tracker_add(ndp, dp);
+ error = nameicap_tracker_add(ndp, dp);
+ if (error != 0)
+ goto bad;
goto unionlookup;
}
@@ -1415,7 +1451,7 @@ nextname:
goto dirloop;
}
if (cnp->cn_flags & ISDOTDOT) {
- error = nameicap_check_dotdot(ndp, ndp->ni_vp);
+ error = nameicap_check_dotdot(ndp, ndp->ni_dvp);
if (error != 0)
goto bad2;
}
@@ -1485,8 +1521,11 @@ success:
}
success_right_lock:
if (ndp->ni_vp != NULL) {
- if ((cnp->cn_flags & ISDOTDOT) == 0)
- nameicap_tracker_add(ndp, ndp->ni_vp);
+ if ((cnp->cn_flags & ISDOTDOT) == 0) {
+ error = nameicap_tracker_add(ndp, ndp->ni_vp);
+ if (error != 0)
+ goto bad2;
+ }
if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS)
return (vfs_lookup_failifexists(ndp));
}