/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname * lookup logic.
*/ /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
*/
/* [Feb-1997 T. Schoebel-Theuer] * Fundamental changes in the pathname lookup mechanisms (namei) * were necessary because of omirr. The reason is that omirr needs * to know the _real_ pathname, not the user-supplied one, in case * of symlinks (and also when transname replacements occur). * * The new code replaces the old recursive symlink resolution with * an iterative one (in case of non-nested symlink chains). It does * this with calls to <fs>_follow_link(). * As a side effect, dir_namei(), _namei() and follow_link() are now * replaced with a single function lookup_dentry() that can handle all * the special cases of the former code. * * With the new dcache, the pathname is stored at each inode, at least as * long as the refcount of the inode is positive. As a side effect, the * size of the dcache depends on the inode cache and thus is dynamic. * * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink * resolution to correspond with current state of the code. * * Note that the symlink resolution is not *completely* iterative. * There is still a significant amount of tail- and mid- recursion in * the algorithm. Also, note that <fs>_readlink() is not used in * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() * may return different results than <fs>_follow_link(). Many virtual * filesystems (including /proc) exhibit this behavior.
*/
/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL * and the name already exists in form of a symlink, try to create the new * name indicated by the symlink. The old code always complained that the * name already exists, due to not following the symlink even if its target * is nonexistent. The new semantics affects also mknod() and link() when * the name is a symlink pointing to a non-existent name. * * I don't know which semantics is the right one, since I have no access * to standards. But I found by trial that HP-UX 9.0 has the full "new" * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the * "old" one. Personally, I think the new semantics is much more logical. * Note that "ln old new" where "new" is a symlink pointing to a non-existing * file does succeed in both HP-UX and SunOs, but not in Solaris * and in the old Linux semantics.
*/
/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink * semantics. See the comments in "open_namei" and "do_link" below. * * [10-Sep-98 Alan Modra] Another symlink change.
*/
/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: * inside the path - always follow. * in the last component in creation/removal/renaming - never follow. * if LOOKUP_FOLLOW passed - follow. * if the pathname has trailing slashes - follow. * otherwise - don't follow. * (applied in that order). * * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT * restored for 2.4. This is the last surviving part of old 4.2BSD bug. * During the 2.4 we need to fix the userland stuff depending on it - * hopefully we will be able to get rid of that wart in 2.5. So far only * XEmacs seems to be relying on it...
*/ /* * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives * any extra contention...
*/
/* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the * kernel data space before using them.. * * POSIX.1 2.4: an empty pathname is invalid (ENOENT). * PATH_MAX includes the nul terminator --RR.
*/
struct filename *
getname_flags(constchar __user *filename, int flags)
{ struct filename *result; char *kname; int len;
result = audit_reusename(filename); if (result) return result;
result = __getname(); if (unlikely(!result)) return ERR_PTR(-ENOMEM);
/* * First, try to embed the struct filename inside the names_cache * allocation
*/
kname = (char *)result->iname;
result->name = kname;
len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX); /* * Handle both empty path and copy failure in one go.
*/ if (unlikely(len <= 0)) { if (unlikely(len < 0)) {
__putname(result); return ERR_PTR(len);
}
/* The empty path is special. */ if (!(flags & LOOKUP_EMPTY)) {
__putname(result); return ERR_PTR(-ENOENT);
}
}
/* * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a * separate struct filename so we can dedicate the entire * names_cache allocation for the pathname, and re-do the copy from * userland.
*/ if (unlikely(len == EMBEDDED_NAME_MAX)) { const size_t size = offsetof(struct filename, iname[1]);
kname = (char *)result;
/* * size is chosen that way we to guarantee that * result->iname[0] is within the same object and that * kname can't be equal to result->iname, no matter what.
*/
result = kzalloc(size, GFP_KERNEL); if (unlikely(!result)) {
__putname(kname); return ERR_PTR(-ENOMEM);
}
result->name = kname;
len = strncpy_from_user(kname, filename, PATH_MAX); if (unlikely(len < 0)) {
__putname(kname);
kfree(result); return ERR_PTR(len);
} /* The empty path is special. */ if (unlikely(!len) && !(flags & LOOKUP_EMPTY)) {
__putname(kname);
kfree(result); return ERR_PTR(-ENOENT);
} if (unlikely(len == PATH_MAX)) {
__putname(kname);
kfree(result); return ERR_PTR(-ENAMETOOLONG);
}
}
initname(result, filename);
audit_getname(result); return result;
}
struct filename *getname_uflags(constchar __user *filename, int uflags)
{ int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
/** * check_acl - perform ACL permission checking * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * * This function performs the ACL permission checking. Since this function * retrieve POSIX acls it needs to know whether it is called from a blocking or * non-blocking context and thus cares about the MAY_NOT_BLOCK bit. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap.
*/ staticint check_acl(struct mnt_idmap *idmap, struct inode *inode, int mask)
{ #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *acl;
if (mask & MAY_NOT_BLOCK) {
acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS); if (!acl) return -EAGAIN; /* no ->get_inode_acl() calls in RCU mode... */ if (is_uncached_acl(acl)) return -ECHILD; return posix_acl_permission(idmap, inode, acl, mask);
}
acl = get_inode_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { int error = posix_acl_permission(idmap, inode, acl, mask);
posix_acl_release(acl); return error;
} #endif
return -EAGAIN;
}
/* * Very quick optimistic "we know we have no ACL's" check. * * Note that this is purely for ACL_TYPE_ACCESS, and purely * for the "we have cached that there are no ACLs" case. * * If this returns true, we know there are no ACLs. But if * it returns false, we might still not have ACLs (it could * be the is_uncached_acl() case).
*/ staticinlinebool no_acl_inode(struct inode *inode)
{ #ifdef CONFIG_FS_POSIX_ACL return likely(!READ_ONCE(inode->i_acl)); #else returntrue; #endif
}
/** * acl_permission_check - perform basic UNIX permission checking * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * * This function performs the basic UNIX permission checking. Since this * function may retrieve POSIX acls it needs to know whether it is called from a * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap.
*/ staticint acl_permission_check(struct mnt_idmap *idmap, struct inode *inode, int mask)
{ unsignedint mode = inode->i_mode;
vfsuid_t vfsuid;
/* * Common cheap case: everybody has the requested * rights, and there are no ACLs to check. No need * to do any owner/group checks in that case. * * - 'mask&7' is the requested permission bit set * - multiplying by 0111 spreads them out to all of ugo * - '& ~mode' looks for missing inode permission bits * - the '!' is for "no missing permissions" * * After that, we just need to check that there are no * ACL's on the inode - do the 'IS_POSIXACL()' check last * because it will dereference the ->i_sb pointer and we * want to avoid that if at all possible.
*/ if (!((mask & 7) * 0111 & ~mode)) { if (no_acl_inode(inode)) return 0; if (!IS_POSIXACL(inode)) return 0;
}
/* Are we the owner? If so, ACL's don't matter */
vfsuid = i_uid_into_vfsuid(idmap, inode); if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) {
mask &= 7;
mode >>= 6; return (mask & ~mode) ? -EACCES : 0;
}
/* Do we have ACL's? */ if (IS_POSIXACL(inode) && (mode & S_IRWXG)) { int error = check_acl(idmap, inode, mask); if (error != -EAGAIN) return error;
}
/* Only RWX matters for group/other mode bits */
mask &= 7;
/* * Are the group permissions different from * the other permissions in the bits we care * about? Need to check group ownership if so.
*/ if (mask & (mode ^ (mode >> 3))) {
vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); if (vfsgid_in_group_p(vfsgid))
mode >>= 3;
}
/* Bits in 'mode' clear that we require? */ return (mask & ~mode) ? -EACCES : 0;
}
/** * generic_permission - check for access rights on a Posix-like filesystem * @idmap: idmap of the mount the inode was found from * @inode: inode to check access rights for * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, * %MAY_NOT_BLOCK ...) * * Used to check for read/write/execute permissions on a file. * We use "fsuid" for this, letting us set arbitrary permissions * for filesystem access without changing the "normal" uids which * are used for other things. * * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk * request cannot be satisfied (eg. requires blocking or too much complexity). * It would then be called again in ref-walk mode. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap.
*/ int generic_permission(struct mnt_idmap *idmap, struct inode *inode, int mask)
{ int ret;
/* * Do the basic permission checks.
*/
ret = acl_permission_check(idmap, inode, mask); if (ret != -EACCES) return ret;
if (S_ISDIR(inode->i_mode)) { /* DACs are overridable for directories */ if (!(mask & MAY_WRITE)) if (capable_wrt_inode_uidgid(idmap, inode,
CAP_DAC_READ_SEARCH)) return 0; if (capable_wrt_inode_uidgid(idmap, inode,
CAP_DAC_OVERRIDE)) return 0; return -EACCES;
}
/* * Searching includes executable on directories, else just read.
*/
mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ) if (capable_wrt_inode_uidgid(idmap, inode,
CAP_DAC_READ_SEARCH)) return 0; /* * Read/write DACs are always overridable. * Executable DACs are overridable when there is * at least one exec bit set.
*/ if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) if (capable_wrt_inode_uidgid(idmap, inode,
CAP_DAC_OVERRIDE)) return 0;
/** * do_inode_permission - UNIX permission checking * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * * We _really_ want to just do "generic_permission()" without * even looking at the inode->i_op values. So we keep a cache * flag in inode->i_opflags, that says "this has not special * permission function, use the fast case".
*/ staticinlineint do_inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask)
{ if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { if (likely(inode->i_op->permission)) return inode->i_op->permission(idmap, inode, mask);
/* This gets set once for the inode lifetime */
spin_lock(&inode->i_lock);
inode->i_opflags |= IOP_FASTPERM;
spin_unlock(&inode->i_lock);
} return generic_permission(idmap, inode, mask);
}
/** * sb_permission - Check superblock-level permissions * @sb: Superblock of inode to check permission on * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks.
*/ staticint sb_permission(struct super_block *sb, struct inode *inode, int mask)
{ if (unlikely(mask & MAY_WRITE)) {
umode_t mode = inode->i_mode;
/* Nobody gets write access to a read-only fs. */ if (sb_rdonly(sb) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS;
} return 0;
}
/** * inode_permission - Check for access rights to a given inode * @idmap: idmap of the mount the inode was found from * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Check for read/write/execute permissions on an inode. We use fs[ug]id for * this, letting us set arbitrary permissions for filesystem access without * changing the "normal" UIDs which are used for other things. * * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
*/ int inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask)
{ int retval;
retval = sb_permission(inode->i_sb, inode, mask); if (unlikely(retval)) return retval;
if (unlikely(mask & MAY_WRITE)) { /* * Nobody gets write access to an immutable file.
*/ if (unlikely(IS_IMMUTABLE(inode))) return -EPERM;
/* * Updating mtime will likely cause i_uid and i_gid to be * written back improperly if their true value is unknown * to the vfs.
*/ if (unlikely(HAS_UNMAPPED_ID(idmap, inode))) return -EACCES;
}
retval = do_inode_permission(idmap, inode, mask); if (unlikely(retval)) return retval;
retval = devcgroup_inode_permission(inode, mask); if (unlikely(retval)) return retval;
/** * path_get - get a reference to a path * @path: path to get the reference to * * Given a path increment the reference count to the dentry and the vfsmount.
*/ void path_get(conststruct path *path)
{
mntget(path->mnt);
dget(path->dentry);
}
EXPORT_SYMBOL(path_get);
/** * path_put - put a reference to a path * @path: path to put the reference to * * Given a path decrement the reference count to the dentry and the vfsmount.
*/ void path_put(conststruct path *path)
{
dput(path->dentry);
mntput(path->mnt);
}
EXPORT_SYMBOL(path_put);
/** * path_connected - Verify that a dentry is below mnt.mnt_root * @mnt: The mountpoint to check. * @dentry: The dentry to check. * * Rename can sometimes move a file or directory outside of a bind * mount, path_connected allows those cases to be detected.
*/ staticbool path_connected(struct vfsmount *mnt, struct dentry *dentry)
{ struct super_block *sb = mnt->mnt_sb;
/* Bind mounts can have disconnected paths */ if (mnt->mnt_root == sb->s_root) returntrue;
return is_subdir(dentry, mnt->mnt_root);
}
staticvoid drop_links(struct nameidata *nd)
{ int i = nd->depth; while (i--) { struct saved *last = nd->stack + i;
do_delayed_call(&last->done);
clear_delayed_call(&last->done);
}
}
staticbool legitimize_links(struct nameidata *nd)
{ int i; if (unlikely(nd->flags & LOOKUP_CACHED)) {
drop_links(nd);
nd->depth = 0; returnfalse;
} for (i = 0; i < nd->depth; i++) { struct saved *last = nd->stack + i; if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
drop_links(nd);
nd->depth = i + 1; returnfalse;
}
} returntrue;
}
staticbool legitimize_root(struct nameidata *nd)
{ /* Nothing to do if nd->root is zero or is managed by the VFS user. */ if (!nd->root.mnt || (nd->state & ND_ROOT_PRESET)) returntrue;
nd->state |= ND_ROOT_GRABBED; return legitimize_path(nd, &nd->root, nd->root_seq);
}
/* * Path walking has 2 modes, rcu-walk and ref-walk (see * Documentation/filesystems/path-lookup.txt). In situations when we can't * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab * normal reference counts on dentries and vfsmounts to transition to ref-walk * mode. Refcounts are grabbed at the last known good point before rcu-walk * got stuck, so ref-walk may continue from there. If this is not successful * (eg. a seqcount has changed), then failure is returned and it's up to caller * to restart the path walk from the beginning in ref-walk mode.
*/
/** * try_to_unlazy - try to switch to ref-walk mode. * @nd: nameidata pathwalk data * Returns: true on success, false on failure * * try_to_unlazy attempts to legitimize the current nd->path and nd->root * for ref-walk mode. * Must be called from rcu-walk context. * Nothing should touch nameidata between try_to_unlazy() failure and * terminate_walk().
*/ staticbool try_to_unlazy(struct nameidata *nd)
{ struct dentry *parent = nd->path.dentry;
BUG_ON(!(nd->flags & LOOKUP_RCU));
if (unlikely(!legitimize_links(nd))) goto out1; if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) goto out; if (unlikely(!legitimize_root(nd))) goto out;
leave_rcu(nd);
BUG_ON(nd->inode != parent->d_inode); returntrue;
/** * try_to_unlazy_next - try to switch to ref-walk mode. * @nd: nameidata pathwalk data * @dentry: next dentry to step into * Returns: true on success, false on failure * * Similar to try_to_unlazy(), but here we have the next dentry already * picked by rcu-walk and want to legitimize that in addition to the current * nd->path and nd->root for ref-walk mode. Must be called from rcu-walk context. * Nothing should touch nameidata between try_to_unlazy_next() failure and * terminate_walk().
*/ staticbool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
{ int res;
BUG_ON(!(nd->flags & LOOKUP_RCU));
if (unlikely(!legitimize_links(nd))) goto out2;
res = __legitimize_mnt(nd->path.mnt, nd->m_seq); if (unlikely(res)) { if (res > 0) goto out2; goto out1;
} if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref))) goto out1;
/* * We need to move both the parent and the dentry from the RCU domain * to be properly refcounted. And the sequence number in the dentry * validates *both* dentry counters, since we checked the sequence * number of the parent after we got the child sequence number. So we * know the parent must still be valid if the child sequence number is
*/ if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) goto out; if (read_seqcount_retry(&dentry->d_seq, nd->next_seq)) goto out_dput; /* * Sequence counts matched. Now make sure that the root is * still valid and get it if required.
*/ if (unlikely(!legitimize_root(nd))) goto out_dput;
leave_rcu(nd); returntrue;
/** * complete_walk - successful completion of path walk * @nd: pointer nameidata * * If we had been in RCU mode, drop out of it and legitimize nd->path. * Revalidate the final result, unless we'd already done that during * the path walk or the filesystem doesn't ask for it. Return 0 on * success, -error on failure. In case of failure caller does not * need to drop nd->path.
*/ staticint complete_walk(struct nameidata *nd)
{ struct dentry *dentry = nd->path.dentry; int status;
if (nd->flags & LOOKUP_RCU) { /* * We don't want to zero nd->root for scoped-lookups or * externally-managed nd->root.
*/ if (!(nd->state & ND_ROOT_PRESET)) if (!(nd->flags & LOOKUP_IS_SCOPED))
nd->root.mnt = NULL;
nd->flags &= ~LOOKUP_CACHED; if (!try_to_unlazy(nd)) return -ECHILD;
}
if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) { /* * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't * ever step outside the root during lookup" and should already * be guaranteed by the rest of namei, we want to avoid a namei * BUG resulting in userspace being given a path that was not * scoped within the root at some point during the lookup. * * So, do a final sanity-check to make sure that in the * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED) * we won't silently return an fd completely outside of the * requested root to userspace. * * Userspace could move the path outside the root after this * check, but as discussed elsewhere this is not a concern (the * resolved file was inside the root at some point).
*/ if (!path_is_under(&nd->path, &nd->root)) return -EXDEV;
}
if (likely(!(nd->state & ND_JUMPED))) return 0;
if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) return 0;
status = dentry->d_op->d_weak_revalidate(dentry, nd->flags); if (status > 0) return 0;
/* * Jumping to the real root in a scoped-lookup is a BUG in namei, but we * still have to ensure it doesn't happen because it will cause a breakout * from the dirfd.
*/ if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED)) return -ENOTRECOVERABLE;
staticint nd_jump_root(struct nameidata *nd)
{ if (unlikely(nd->flags & LOOKUP_BENEATH)) return -EXDEV; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) { /* Absolute path arguments to path_init() are allowed. */ if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt) return -EXDEV;
} if (!nd->root.mnt) { int error = set_root(nd); if (error) return error;
} if (nd->flags & LOOKUP_RCU) { struct dentry *d;
nd->path = nd->root;
d = nd->path.dentry;
nd->inode = d->d_inode;
nd->seq = nd->root_seq; if (read_seqcount_retry(&d->d_seq, nd->seq)) return -ECHILD;
} else {
path_put(&nd->path);
nd->path = nd->root;
path_get(&nd->path);
nd->inode = nd->path.dentry->d_inode;
}
nd->state |= ND_JUMPED; return 0;
}
/* * Helper to directly jump to a known parsed path from ->get_link, * caller must have taken a reference to path beforehand.
*/ int nd_jump_link(conststruct path *path)
{ int error = -ELOOP; struct nameidata *nd = current->nameidata;
if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS)) goto err;
error = -EXDEV; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) { if (nd->path.mnt != path->mnt) goto err;
} /* Not currently safe for scoped-lookups. */ if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) goto err;
/** * may_follow_link - Check symlink following for unsafe situations * @nd: nameidata pathwalk data * @inode: Used for idmapping. * * In the case of the sysctl_protected_symlinks sysctl being enabled, * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is * in a sticky world-writable directory. This is to protect privileged * processes from failing races against path names that may change out * from under them by way of other users creating malicious symlinks. * It will permit symlinks to be followed only when outside a sticky * world-writable directory, or when the uid of the symlink and follower * match, or when the directory owner matches the symlink's owner. * * Returns 0 if following the symlink is allowed, -ve on error.
*/ staticinlineint may_follow_link(struct nameidata *nd, conststruct inode *inode)
{ struct mnt_idmap *idmap;
vfsuid_t vfsuid;
if (!sysctl_protected_symlinks) return 0;
idmap = mnt_idmap(nd->path.mnt);
vfsuid = i_uid_into_vfsuid(idmap, inode); /* Allowed if owner and follower match. */ if (vfsuid_eq_kuid(vfsuid, current_fsuid())) return 0;
/* Allowed if parent directory not sticky and world-writable. */ if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH)) return 0;
/* Allowed if parent directory and link owner match. */ if (vfsuid_valid(nd->dir_vfsuid) && vfsuid_eq(nd->dir_vfsuid, vfsuid)) return 0;
/** * safe_hardlink_source - Check for safe hardlink conditions * @idmap: idmap of the mount the inode was found from * @inode: the source inode to hardlink from * * Return false if at least one of the following conditions: * - inode is not a regular file * - inode is setuid * - inode is setgid and group-exec * - access failure for read and write * * Otherwise returns true.
*/ staticbool safe_hardlink_source(struct mnt_idmap *idmap, struct inode *inode)
{
umode_t mode = inode->i_mode;
/* Special files should not get pinned to the filesystem. */ if (!S_ISREG(mode)) returnfalse;
/* Setuid files should not get pinned to the filesystem. */ if (mode & S_ISUID) returnfalse;
/* Executable setgid files should not get pinned to the filesystem. */ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) returnfalse;
/* Hardlinking to unreadable or unwritable sources is dangerous. */ if (inode_permission(idmap, inode, MAY_READ | MAY_WRITE)) returnfalse;
returntrue;
}
/** * may_linkat - Check permissions for creating a hardlink * @idmap: idmap of the mount the inode was found from * @link: the source to hardlink from * * Block hardlink when all of: * - sysctl_protected_hardlinks enabled * - fsuid does not match inode * - hardlink source is unsafe (see safe_hardlink_source() above) * - not CAP_FOWNER in a namespace with the inode owner uid mapped * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. * * Returns 0 if successful, -ve on error.
*/ int may_linkat(struct mnt_idmap *idmap, conststruct path *link)
{ struct inode *inode = link->dentry->d_inode;
/* Inode writeback is not safe when the uid or gid are invalid. */ if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
!vfsgid_valid(i_gid_into_vfsgid(idmap, inode))) return -EOVERFLOW;
if (!sysctl_protected_hardlinks) return 0;
/* Source inode owner (or CAP_FOWNER) can hardlink all they like, * otherwise, it must be a safe source.
*/ if (safe_hardlink_source(idmap, inode) ||
inode_owner_or_capable(idmap, inode)) return 0;
/** * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory * should be allowed, or not, on files that already * exist. * @idmap: idmap of the mount the inode was found from * @nd: nameidata pathwalk data * @inode: the inode of the file to open * * Block an O_CREAT open of a FIFO (or a regular file) when: * - sysctl_protected_fifos (or sysctl_protected_regular) is enabled * - the file already exists * - we are in a sticky directory * - we don't own the file * - the owner of the directory doesn't own the file * - the directory is world writable * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2 * the directory doesn't have to be world writable: being group writable will * be enough. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. * * Returns 0 if the open is allowed, -ve on error.
*/ staticint may_create_in_sticky(struct mnt_idmap *idmap, struct nameidata *nd, struct inode *const inode)
{
umode_t dir_mode = nd->dir_mode;
vfsuid_t dir_vfsuid = nd->dir_vfsuid, i_vfsuid;
if (likely(!(dir_mode & S_ISVTX))) return 0;
if (S_ISREG(inode->i_mode) && !sysctl_protected_regular) return 0;
if (S_ISFIFO(inode->i_mode) && !sysctl_protected_fifos) return 0;
i_vfsuid = i_uid_into_vfsuid(idmap, inode);
if (vfsuid_eq(i_vfsuid, dir_vfsuid)) return 0;
if (vfsuid_eq_kuid(i_vfsuid, current_fsuid())) return 0;
if (likely(dir_mode & 0002)) {
audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create"); return -EACCES;
}
if (dir_mode & 0020) { if (sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) {
audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create_fifo"); return -EACCES;
}
/* * follow_up - Find the mountpoint of path's vfsmount * * Given a path, find the mountpoint of its source file system. * Replace @path with the path of the mountpoint in the parent mount. * Up is towards /. * * Return 1 if we went up a level and 0 if we were already at the * root.
*/ int follow_up(struct path *path)
{ struct mount *mnt = real_mount(path->mnt); struct mount *parent; struct dentry *mountpoint;
rcu_read_lock(); while (1) { unsigned seq, mseq = read_seqbegin(&mount_lock);
found = choose_mountpoint_rcu(m, root, path, &seq); if (unlikely(!found)) { if (!read_seqretry(&mount_lock, mseq)) break;
} else { if (likely(__legitimize_path(path, seq, mseq))) break;
rcu_read_unlock();
path_put(path);
rcu_read_lock();
}
}
rcu_read_unlock(); return found;
}
/* * Perform an automount * - return -EISDIR to tell follow_managed() to stop and return the path we * were called with.
*/ staticint follow_automount(struct path *path, int *count, unsigned lookup_flags)
{ struct dentry *dentry = path->dentry;
/* We don't want to mount if someone's just doing a stat - * unless they're stat'ing a directory and appended a '/' to * the name. * * We do, however, want to mount if someone wants to open or * create a file of any type under the mountpoint, wants to * traverse through the mountpoint or wants to open the * mounted directory. Also, autofs may mark negative dentries * as being automount points. These will need the attentions * of the daemon to instantiate them before they can be used.
*/ if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
dentry->d_inode) return -EISDIR;
/* No need to trigger automounts if mountpoint crossing is disabled. */ if (lookup_flags & LOOKUP_NO_XDEV) return -EXDEV;
if (count && (*count)++ >= MAXSYMLINKS) return -ELOOP;
/* * mount traversal - out-of-line part. One note on ->d_flags accesses - * dentries are pinned but not locked here, so negative dentry can go * positive right under us. Use of smp_load_acquire() provides a barrier * sufficient for ->d_inode and ->d_flags consistency.
*/ staticint __traverse_mounts(struct path *path, unsigned flags, bool *jumped, int *count, unsigned lookup_flags)
{ struct vfsmount *mnt = path->mnt; bool need_mntput = false; int ret = 0;
while (flags & DCACHE_MANAGED_DENTRY) { /* Allow the filesystem to manage the transit without i_rwsem
* being held. */ if (flags & DCACHE_MANAGE_TRANSIT) { if (lookup_flags & LOOKUP_NO_XDEV) {
ret = -EXDEV; break;
}
ret = path->dentry->d_op->d_manage(path, false);
flags = smp_load_acquire(&path->dentry->d_flags); if (ret < 0) break;
}
if (flags & DCACHE_MOUNTED) { // something's mounted on it.. struct vfsmount *mounted = lookup_mnt(path); if (mounted) { // ... in our namespace
dput(path->dentry); if (need_mntput)
mntput(path->mnt);
path->mnt = mounted;
path->dentry = dget(mounted->mnt_root); // here we know it's positive
flags = path->dentry->d_flags;
need_mntput = true; continue;
}
}
if (!(flags & DCACHE_NEED_AUTOMOUNT)) break;
// uncovered automount point
ret = follow_automount(path, count, lookup_flags);
flags = smp_load_acquire(&path->dentry->d_flags); if (ret < 0) break;
}
if (ret == -EISDIR)
ret = 0; // possible if you race with several mount --move if (need_mntput && path->mnt == mnt)
mntput(path->mnt); if (!ret && unlikely(d_flags_negative(flags)))
ret = -ENOENT;
*jumped = need_mntput; return ret;
}
/* * Follow down to the covering mount currently visible to userspace. At each * point, the filesystem owning that dentry may be queried as to whether the * caller is permitted to proceed or not.
*/ int follow_down(struct path *path, unsignedint flags)
{ struct vfsmount *mnt = path->mnt; bool jumped; int ret = traverse_mounts(path, &jumped, NULL, flags);
if (path->mnt != mnt)
mntput(mnt); return ret;
}
EXPORT_SYMBOL(follow_down);
/* * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if * we meet a managed dentry that would need blocking.
*/ staticbool __follow_mount_rcu(struct nameidata *nd, struct path *path)
{ struct dentry *dentry = path->dentry; unsignedint flags = dentry->d_flags;
if (likely(!(flags & DCACHE_MANAGED_DENTRY))) returntrue;
if (unlikely(nd->flags & LOOKUP_NO_XDEV)) returnfalse;
for (;;) { /* * Don't forget we might have a non-mountpoint managed dentry * that wants to block transit.
*/ if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) { int res = dentry->d_op->d_manage(path, true); if (res) return res == -EISDIR;
flags = dentry->d_flags;
}
if (flags & DCACHE_MOUNTED) { struct mount *mounted = __lookup_mnt(path->mnt, dentry); if (mounted) {
path->mnt = &mounted->mnt;
dentry = path->dentry = mounted->mnt.mnt_root;
nd->state |= ND_JUMPED;
nd->next_seq = read_seqcount_begin(&dentry->d_seq);
flags = dentry->d_flags; // makes sure that non-RCU pathwalk could reach // this state. if (read_seqretry(&mount_lock, nd->m_seq)) returnfalse; continue;
} if (read_seqretry(&mount_lock, nd->m_seq)) returnfalse;
} return !(flags & DCACHE_NEED_AUTOMOUNT);
}
}
path->mnt = nd->path.mnt;
path->dentry = dentry; if (nd->flags & LOOKUP_RCU) { unsignedint seq = nd->next_seq; if (likely(__follow_mount_rcu(nd, path))) return 0; // *path and nd->next_seq might've been clobbered
path->mnt = nd->path.mnt;
path->dentry = dentry;
nd->next_seq = seq; if (!try_to_unlazy_next(nd, dentry)) return -ECHILD;
}
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags); if (jumped) { if (unlikely(nd->flags & LOOKUP_NO_XDEV))
ret = -EXDEV; else
nd->state |= ND_JUMPED;
} if (unlikely(ret)) {
dput(path->dentry); if (path->mnt != nd->path.mnt)
mntput(path->mnt);
} return ret;
}
/* * This looks up the name in dcache and possibly revalidates the found dentry. * NULL is returned if the dentry does not exist in the cache.
*/ staticstruct dentry *lookup_dcache(conststruct qstr *name, struct dentry *dir, unsignedint flags)
{ struct dentry *dentry = d_lookup(dir, name); if (dentry) { int error = d_revalidate(dir->d_inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error)
d_invalidate(dentry);
dput(dentry); return ERR_PTR(error);
}
} return dentry;
}
/* * Parent directory has inode locked exclusive. This is one * and only case when ->lookup() gets called on non in-lookup * dentries - as the matter of fact, this only gets called * when directory is guaranteed to have no in-lookup children * at all. * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. * Will return -EEXIST if name is found and LOOKUP_EXCL was passed.
*/ struct dentry *lookup_one_qstr_excl(conststruct qstr *name, struct dentry *base, unsignedint flags)
{ struct dentry *dentry; struct dentry *old; struct inode *dir;
dentry = lookup_dcache(name, base, flags); if (dentry) goto found;
/* Don't create child dentry for a dead directory. */
dir = base->d_inode; if (unlikely(IS_DEADDIR(dir))) return ERR_PTR(-ENOENT);
dentry = d_alloc(base, name); if (unlikely(!dentry)) return ERR_PTR(-ENOMEM);
old = dir->i_op->lookup(dir, dentry, flags); if (unlikely(old)) {
dput(dentry);
dentry = old;
}
found: if (IS_ERR(dentry)) return dentry; if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) {
dput(dentry); return ERR_PTR(-ENOENT);
} if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) {
dput(dentry); return ERR_PTR(-EEXIST);
} return dentry;
}
EXPORT_SYMBOL(lookup_one_qstr_excl);
/** * lookup_fast - do fast lockless (but racy) lookup of a dentry * @nd: current nameidata * * Do a fast, but racy lookup in the dcache for the given dentry, and * revalidate it. Returns a valid dentry pointer or NULL if one wasn't * found. On error, an ERR_PTR will be returned. * * If this function returns a valid dentry and the walk is no longer * lazy, the dentry will carry a reference that must later be put. If * RCU mode is still in force, then this is not the case and the dentry * must be legitimized before use. If this returns NULL, then the walk * will no longer be in RCU mode.
*/ staticstruct dentry *lookup_fast(struct nameidata *nd)
{ struct dentry *dentry, *parent = nd->path.dentry; int status = 1;
/* * Rename seqlock is not required here because in the off chance * of a false negative due to a concurrent rename, the caller is * going to fall back to non-racy lookup.
*/ if (nd->flags & LOOKUP_RCU) {
dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq); if (unlikely(!dentry)) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); return NULL;
}
/* * This sequence count validates that the parent had no * changes while we did the lookup of the dentry above.
*/ if (read_seqcount_retry(&parent->d_seq, nd->seq)) return ERR_PTR(-ECHILD);
status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); if (likely(status > 0)) return dentry; if (!try_to_unlazy_next(nd, dentry)) return ERR_PTR(-ECHILD); if (status == -ECHILD) /* we'd been told to redo it in non-rcu mode */
status = d_revalidate(nd->inode, &nd->last,
dentry, nd->flags);
} else {
dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) return NULL;
status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags);
} if (unlikely(status <= 0)) { if (!status)
d_invalidate(dentry);
dput(dentry); return ERR_PTR(status);
} return dentry;
}
/* Fast lookup failed, do it the slow way */ staticstruct dentry *__lookup_slow(conststruct qstr *name, struct dentry *dir, unsignedint flags)
{ struct dentry *dentry, *old; struct inode *inode = dir->d_inode;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
/* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) return ERR_PTR(-ENOENT);
again:
dentry = d_alloc_parallel(dir, name, &wq); if (IS_ERR(dentry)) return dentry; if (unlikely(!d_in_lookup(dentry))) { int error = d_revalidate(inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) {
d_invalidate(dentry);
dput(dentry); goto again;
}
dput(dentry);
dentry = ERR_PTR(error);
}
} else {
old = inode->i_op->lookup(inode, dentry, flags);
d_lookup_done(dentry); if (unlikely(old)) {
dput(dentry);
dentry = old;
}
} return dentry;
}
if (likely(nd->depth != EMBEDDED_LEVELS)) return 0; if (likely(nd->stack != nd->internal)) return 0; if (likely(nd_alloc_stack(nd))) return 0;
if (nd->flags & LOOKUP_RCU) { // we need to grab link before we do unlazy. And we can't skip // unlazy even if we fail to grab the link - cleanup needs it bool grabbed_link = legitimize_path(nd, link, nd->next_seq);
if (!try_to_unlazy(nd) || !grabbed_link) return -ECHILD;
if (nd_alloc_stack(nd)) return 0;
} return -ENOMEM;
}
res = READ_ONCE(inode->i_link); if (!res) { constchar * (*get)(struct dentry *, struct inode *, struct delayed_call *);
get = inode->i_op->get_link; if (nd->flags & LOOKUP_RCU) {
res = get(NULL, inode, &last->done); if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd))
res = get(link->dentry, inode, &last->done);
} else {
res = get(link->dentry, inode, &last->done);
} if (!res) goto all_done; if (IS_ERR(res)) return res;
} if (*res == '/') {
error = nd_jump_root(nd); if (unlikely(error)) return ERR_PTR(error); while (unlikely(*++res == '/'))
;
} if (*res) return res;
all_done: // pure jump
put_link(nd); return NULL;
}
/* * Do we need to follow links? We _really_ want to be able * to do this check without having to look at inode->i_op, * so we keep a cache of "no, this doesn't need follow_link" * for the common case. * * NOTE: dentry must be what nd->next_seq had been sampled from.
*/ staticconstchar *step_into(struct nameidata *nd, int flags, struct dentry *dentry)
{ struct path path; struct inode *inode; int err = handle_mounts(nd, dentry, &path);
if (err < 0) return ERR_PTR(err);
inode = path.dentry->d_inode; if (likely(!d_is_symlink(path.dentry)) ||
((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
(flags & WALK_NOFOLLOW)) { /* not a symlink or should not follow */ if (nd->flags & LOOKUP_RCU) { if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD); if (unlikely(!inode)) return ERR_PTR(-ENOENT);
} else {
dput(nd->path.dentry); if (nd->path.mnt != path.mnt)
mntput(nd->path.mnt);
}
nd->path = path;
nd->inode = inode;
nd->seq = nd->next_seq; return NULL;
} if (nd->flags & LOOKUP_RCU) { /* make sure that d_is_symlink above matches inode */ if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD);
} else { if (path.mnt == nd->path.mnt)
mntget(path.mnt);
} return pick_link(nd, &path, inode, flags);
}
if (path_equal(&nd->path, &nd->root)) goto in_root; if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) { struct path path; unsigned seq; if (!choose_mountpoint_rcu(real_mount(nd->path.mnt),
&nd->root, &path, &seq)) goto in_root; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) return ERR_PTR(-ECHILD);
nd->path = path;
nd->inode = path.dentry->d_inode;
nd->seq = seq; // makes sure that non-RCU pathwalk could reach this state if (read_seqretry(&mount_lock, nd->m_seq)) return ERR_PTR(-ECHILD); /* we know that mountpoint was pinned */
}
old = nd->path.dentry;
parent = old->d_parent;
nd->next_seq = read_seqcount_begin(&parent->d_seq); // makes sure that non-RCU pathwalk could reach this state if (read_seqcount_retry(&old->d_seq, nd->seq)) return ERR_PTR(-ECHILD); if (unlikely(!path_connected(nd->path.mnt, parent))) return ERR_PTR(-ECHILD); return parent;
in_root: if (read_seqretry(&mount_lock, nd->m_seq)) return ERR_PTR(-ECHILD); if (unlikely(nd->flags & LOOKUP_BENEATH)) return ERR_PTR(-ECHILD);
nd->next_seq = nd->seq; return nd->path.dentry;
}
if (path_equal(&nd->path, &nd->root)) goto in_root; if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) { struct path path;
if (!choose_mountpoint(real_mount(nd->path.mnt),
&nd->root, &path)) goto in_root;
path_put(&nd->path);
nd->path = path;
nd->inode = path.dentry->d_inode; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) return ERR_PTR(-EXDEV);
} /* rare case of legitimate dget_parent()... */
parent = dget_parent(nd->path.dentry); if (unlikely(!path_connected(nd->path.mnt, parent))) {
dput(parent); return ERR_PTR(-ENOENT);
} return parent;
in_root: if (unlikely(nd->flags & LOOKUP_BENEATH)) return ERR_PTR(-EXDEV); return dget(nd->path.dentry);
}
staticconstchar *handle_dots(struct nameidata *nd, int type)
{ if (type == LAST_DOTDOT) { constchar *error = NULL; struct dentry *parent;
if (!nd->root.mnt) {
error = ERR_PTR(set_root(nd)); if (error) return error;
} if (nd->flags & LOOKUP_RCU)
parent = follow_dotdot_rcu(nd); else
parent = follow_dotdot(nd); if (IS_ERR(parent)) return ERR_CAST(parent);
error = step_into(nd, WALK_NOFOLLOW, parent); if (unlikely(error)) return error;
if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) { /* * If there was a racing rename or mount along our * path, then we can't be sure that ".." hasn't jumped * above nd->root (and so userspace should retry or use * some fallback).
*/
smp_rmb(); if (__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)) return ERR_PTR(-EAGAIN); if (__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)) return ERR_PTR(-EAGAIN);
}
} return NULL;
}
staticconstchar *walk_component(struct nameidata *nd, int flags)
{ struct dentry *dentry; /* * "." and ".." are special - ".." especially so because it has * to be able to know about the current root directory and * parent relationships.
*/ if (unlikely(nd->last_type != LAST_NORM)) { if (!(flags & WALK_MORE) && nd->depth)
put_link(nd); return handle_dots(nd, nd->last_type);
}
dentry = lookup_fast(nd); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (unlikely(!dentry)) {
dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags); if (IS_ERR(dentry)) return ERR_CAST(dentry);
} if (!(flags & WALK_MORE) && nd->depth)
put_link(nd); return step_into(nd, flags, dentry);
}
/* * We can do the critical dentry name comparison and hashing * operations one word at a time, but we are limited to: * * - Architectures with fast unaligned word accesses. We could * do a "get_unaligned()" if this helps and is sufficiently * fast. * * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we * do not trap on the (extremely unlikely) case of a page * crossing operation. * * - Furthermore, we need an efficient 64-bit compile for the * 64-bit case in order to generate the "number of bytes in * the final mask". Again, that could be replaced with a * efficient population count instruction or similar.
*/ #ifdef CONFIG_DCACHE_WORD_ACCESS
#include <asm/word-at-a-time.h>
#ifdef HASH_MIX
/* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */
#elifdefined(CONFIG_64BIT) /* * Register pressure in the mixing function is an issue, particularly * on 32-bit x86, but almost any function requires one state value and * one temporary. Instead, use a function designed for two state values * and no temporaries. * * This function cannot create a collision in only two iterations, so * we have two iterations to achieve avalanche. In those two iterations, * we have six layers of mixing, which is enough to spread one bit's * influence out to 2^6 = 64 state bits. * * Rotate constants are scored by considering either 64 one-bit input * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the * probability of that delta causing a change to each of the 128 output * bits, using a sample of random initial states. * * The Shannon entropy of the computed probabilities is then summed * to produce a score. Ideally, any input change has a 50% chance of * toggling any given output bit. * * Mixing scores (in bits) for (12,45): * Input delta: 1-bit 2-bit * 1 round: 713.3 42542.6 * 2 rounds: 2753.7 140389.8 * 3 rounds: 5954.1 233458.2 * 4 rounds: 7862.6 256672.2 * Perfect: 8192 258048 * (64*128) (64*63/2 * 128)
*/ #define HASH_MIX(x, y, a) \
( x ^= (a), \
y ^= x, x = rol64(x,12),\
x += y, y = rol64(y,45),\
y *= 9 )
/* * Fold two longs into one 32-bit hash value. This must be fast, but * latency isn't quite as critical, as there is a fair bit of additional * work done before the hash value is used.
*/ staticinlineunsignedint fold_hash(unsignedlong x, unsignedlong y)
{
y ^= x * GOLDEN_RATIO_64;
y *= GOLDEN_RATIO_64; return y >> 32;
}
#else/* 32-bit case */
/* * Mixing scores (in bits) for (7,20): * Input delta: 1-bit 2-bit * 1 round: 330.3 9201.6 * 2 rounds: 1246.4 25475.4 * 3 rounds: 1907.1 31295.1 * 4 rounds: 2042.3 31718.6 * Perfect: 2048 31744 * (32*64) (32*31/2 * 64)
*/ #define HASH_MIX(x, y, a) \
( x ^= (a), \
y ^= x, x = rol32(x, 7),\
x += y, y = rol32(y,20),\
y *= 9 )
staticinlineunsignedint fold_hash(unsignedlong x, unsignedlong y)
{ /* Use arch-optimized multiply if one exists */ return __hash_32(y ^ __hash_32(x));
}
#endif
/* * Return the hash of a string of known length. This is carfully * designed to match hash_name(), which is the more critical function. * In particular, we must end by hashing a final word containing 0..7 * payload bytes, to match the way that hash_name() iterates until it * finds the delimiter after the name.
*/ unsignedint full_name_hash(constvoid *salt, constchar *name, unsignedint len)
{ unsignedlong a, x = 0, y = (unsignedlong)salt;
for (;;) { if (!len) goto done;
a = load_unaligned_zeropad(name); if (len < sizeof(unsignedlong)) break;
HASH_MIX(x, y, a);
name += sizeof(unsignedlong);
len -= sizeof(unsignedlong);
}
x ^= a & bytemask_from_count(len);
done: return fold_hash(x, y);
}
EXPORT_SYMBOL(full_name_hash);
/* Return the "hash_len" (hash and length) of a null-terminated string */
u64 hashlen_string(constvoid *salt, constchar *name)
{ unsignedlong a = 0, x = 0, y = (unsignedlong)salt; unsignedlong adata, mask, len; conststruct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
len = 0; goto inside;
do {
HASH_MIX(x, y, a);
len += sizeof(unsignedlong);
inside:
a = load_unaligned_zeropad(name+len);
} while (!has_zero(a, &adata, &constants));
adata = prep_zero_mask(a, adata, &constants);
mask = create_zero_mask(adata);
x ^= a & zero_bytemask(mask);
return hashlen_create(fold_hash(x, y), len + find_zero(mask));
}
EXPORT_SYMBOL(hashlen_string);
/* * Calculate the length and hash of the path component, and * return the length as the result.
*/ staticinlineconstchar *hash_name(struct nameidata *nd, constchar *name, unsignedlong *lastword)
{ unsignedlong a, b, x, y = (unsignedlong)nd->path.dentry; unsignedlong adata, bdata, mask, len; conststruct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
/* * The first iteration is special, because it can result in * '.' and '..' and has no mixing other than the final fold.
*/
a = load_unaligned_zeropad(name);
b = a ^ REPEAT_BYTE('/'); if (has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)) {
adata = prep_zero_mask(a, adata, &constants);
bdata = prep_zero_mask(b, bdata, &constants);
mask = create_zero_mask(adata | bdata);
a &= zero_bytemask(mask);
*lastword = a;
len = find_zero(mask);
nd->last.hash = fold_hash(a, y);
nd->last.len = len; return name + len;
}
len = 0;
x = 0; do {
HASH_MIX(x, y, a);
len += sizeof(unsignedlong);
a = load_unaligned_zeropad(name+len);
b = a ^ REPEAT_BYTE('/');
} while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
adata = prep_zero_mask(a, adata, &constants);
bdata = prep_zero_mask(b, bdata, &constants);
mask = create_zero_mask(adata | bdata);
a &= zero_bytemask(mask);
x ^= a;
len += find_zero(mask);
*lastword = 0; // Multi-word components cannot be DOT or DOTDOT
/* * Note that the 'last' word is always zero-masked, but * was loaded as a possibly big-endian word.
*/ #ifdef __BIG_ENDIAN #define LAST_WORD_IS_DOT (0x2eul << (BITS_PER_LONG-8)) #define LAST_WORD_IS_DOTDOT (0x2e2eul << (BITS_PER_LONG-16)) #endif
#else/* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
/* Return the hash of a string of known length */
--> --------------------
--> maximum size reached
--> --------------------
¤ Dauer der Verarbeitung: 0.88 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.