/* Do not merge name events without dir fh */ if (!info1->dir_fh_totlen) returnfalse;
if (!fanotify_fsid_equal(&fne1->fsid, &fne2->fsid)) returnfalse;
return fanotify_info_equal(info1, info2);
}
staticbool fanotify_error_event_equal(struct fanotify_error_event *fee1, struct fanotify_error_event *fee2)
{ /* Error events against the same file system are always merged. */ if (!fanotify_fsid_equal(&fee1->fsid, &fee2->fsid)) returnfalse;
/* * We want to merge many dirent events in the same dir (i.e. * creates/unlinks/renames), but we do not want to merge dirent * events referring to subdirs with dirent events referring to * non subdirs, otherwise, user won't be able to tell from a * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+ * unlink pair or rmdir+create pair of events.
*/ if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR)) returnfalse;
/* * FAN_RENAME event is reported with special info record types, * so we cannot merge it with other events.
*/ if ((old->mask & FAN_RENAME) != (new->mask & FAN_RENAME)) returnfalse;
switch (old->type) { case FANOTIFY_EVENT_TYPE_PATH: return fanotify_path_equal(fanotify_event_path(old),
fanotify_event_path(new)); case FANOTIFY_EVENT_TYPE_FID: return fanotify_fid_event_equal(FANOTIFY_FE(old),
FANOTIFY_FE(new)); case FANOTIFY_EVENT_TYPE_FID_NAME: return fanotify_name_event_equal(FANOTIFY_NE(old),
FANOTIFY_NE(new)); case FANOTIFY_EVENT_TYPE_FS_ERROR: return fanotify_error_event_equal(FANOTIFY_EE(old),
FANOTIFY_EE(new)); case FANOTIFY_EVENT_TYPE_MNT: returnfalse; default:
WARN_ON_ONCE(1);
}
returnfalse;
}
/* Limit event merges to limit CPU overhead per event */ #define FANOTIFY_MAX_MERGE_EVENTS 128
/* and the list better be locked by something too! */ staticint fanotify_merge(struct fsnotify_group *group, struct fsnotify_event *event)
{ struct fanotify_event *old, *new = FANOTIFY_E(event); unsignedint bucket = fanotify_event_hash_bucket(group, new); struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; int i = 0;
/* * Don't merge a permission event with any other event so that we know * the event structure we have created in fanotify_handle_event() is the * one we should check for permission response.
*/ if (fanotify_is_perm_event(new->mask)) return 0;
hlist_for_each_entry(old, hlist, merge_list) { if (++i > FANOTIFY_MAX_MERGE_EVENTS) break; if (fanotify_should_merge(old, new)) {
old->mask |= new->mask;
if (fanotify_is_error_event(old->mask))
FANOTIFY_EE(old)->err_count++;
return 1;
}
}
return 0;
}
/* * Wait for response to permission event. The function also takes care of * freeing the permission event (or offloads that in case the wait is canceled * by a signal). The function returns 0 in case access got allowed by userspace, * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case * the wait got interrupted by a signal.
*/ staticint fanotify_get_response(struct fsnotify_group *group, struct fanotify_perm_event *event, struct fsnotify_iter_info *iter_info)
{ int ret, errno;
ret = wait_event_state(group->fanotify_data.access_waitq,
event->state == FAN_EVENT_ANSWERED,
(TASK_KILLABLE|TASK_FREEZABLE));
/* Signal pending? */ if (ret < 0) {
spin_lock(&group->notification_lock); /* Event reported to userspace and no answer yet? */ if (event->state == FAN_EVENT_REPORTED) { /* Event will get freed once userspace answers to it */
event->state = FAN_EVENT_CANCELED;
spin_unlock(&group->notification_lock); return ret;
} /* Event not yet reported? Just remove it. */ if (event->state == FAN_EVENT_INIT) {
fsnotify_remove_queued_event(group, &event->fae.fse); /* Permission events are not supposed to be hashed */
WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list));
} /* * Event may be also answered in case signal delivery raced * with wakeup. In that case we have nothing to do besides * freeing the event and reporting error.
*/
spin_unlock(&group->notification_lock); goto out;
}
/* userspace responded, convert to something usable */ switch (event->response & FANOTIFY_RESPONSE_ACCESS) { case FAN_ALLOW:
ret = 0; break; case FAN_DENY: /* Check custom errno from pre-content events */
errno = fanotify_get_response_errno(event->response); if (errno) {
ret = -errno; break;
}
fallthrough; default:
ret = -EPERM;
}
/* Check if the response should be audited */ if (event->response & FAN_AUDIT) {
u32 response = event->response &
(FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS);
audit_fanotify(response & ~FAN_AUDIT, &event->audit_rule);
}
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
out:
fsnotify_destroy_event(group, &event->fae.fse);
return ret;
}
/* * This function returns a mask for an event that only contains the flags * that have been specifically requested by the user. Flags that may have * been included within the event mask, but have not been explicitly * requested by the user, will not be present in the returned mask.
*/ static u32 fanotify_group_event_mask(struct fsnotify_group *group, struct fsnotify_iter_info *iter_info,
u32 *match_mask, u32 event_mask, constvoid *data, int data_type, struct inode *dir)
{
__u32 marks_mask = 0, marks_ignore_mask = 0;
__u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS |
FANOTIFY_EVENT_FLAGS; conststruct path *path = fsnotify_data_path(data, data_type); unsignedint fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); struct fsnotify_mark *mark; bool ondir = event_mask & FAN_ONDIR; int type;
if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { if (data_type != FSNOTIFY_EVENT_MNT) return 0;
} elseif (!fid_mode) { /* Do we have path to open a file descriptor? */ if (!path) return 0; /* Path type events are only relevant for files and dirs */ if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry)) return 0;
} elseif (!(fid_mode & FAN_REPORT_FID)) { /* Do we have a directory inode to report? */ if (!dir && !ondir) return 0;
}
/* * For dirent modification events (create/delete/move) that do not carry * the child entry name information, we report FAN_ONDIR for mkdir/rmdir * so user can differentiate them from creat/unlink. * * For backward compatibility and consistency, do not report FAN_ONDIR * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR * to user in fid mode for all event types. * * We never report FAN_EVENT_ON_CHILD to user, but we do pass it in to * fanotify_alloc_event() when group is reporting fid as indication * that event happened on child.
*/ if (fid_mode) { /* Do not report event flags without any event */ if (!(test_mask & ~FANOTIFY_EVENT_FLAGS)) return 0;
} else {
user_mask &= ~FANOTIFY_EVENT_FLAGS;
}
return test_mask & user_mask;
}
/* * Check size needed to encode fanotify_fh. * * Return size of encoded fh without fanotify_fh header. * Return 0 on failure to encode.
*/ staticint fanotify_encode_fh_len(struct inode *inode)
{ int dwords = 0; int fh_len;
/* * struct fanotify_error_event might be preallocated and is * limited to MAX_HANDLE_SZ. This should never happen, but * safeguard by forcing an invalid file handle.
*/ if (WARN_ON_ONCE(fh_len > MAX_HANDLE_SZ)) return 0;
return fh_len;
}
/* * Encode fanotify_fh. * * Return total size of encoded fh including fanotify_fh header. * Return 0 on failure to encode.
*/ staticint fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, unsignedint fh_len, unsignedint *hash,
gfp_t gfp)
{ int dwords, type = 0; char *ext_buf = NULL; void *buf = fh + 1; int err;
/* * Invalid FHs are used by FAN_FS_ERROR for errors not * linked to any inode. The f_handle won't be reported * back to userspace.
*/ if (!inode) goto out;
/* * !gpf means preallocated variable size fh, but fh_len could * be zero in that case if encoding fh len failed.
*/
err = -ENOENT; if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4) || fh_len > MAX_HANDLE_SZ) goto out_err;
/* No external buffer in a variable size allocated fh */ if (gfp && fh_len > FANOTIFY_INLINE_FH_LEN) { /* Treat failure to allocate fh as failure to encode fh */
err = -ENOMEM;
ext_buf = kmalloc(fh_len, gfp); if (!ext_buf) goto out_err;
dwords = fh_len >> 2;
type = exportfs_encode_fid(inode, buf, &dwords);
err = -EINVAL; /* * Unlike file_handle, type and len of struct fanotify_fh are u8. * Traditionally, filesystem return handle_type < 0xff, but there * is no enforecement for that in vfs.
*/
BUILD_BUG_ON(MAX_HANDLE_SZ > 0xff || FILEID_INVALID > 0xff); if (type <= 0 || type >= FILEID_INVALID || fh_len != dwords << 2) goto out_err;
fh->type = type;
fh->len = fh_len;
out: /* * Mix fh into event merge key. Hash might be NULL in case of * unhashed FID events (i.e. FAN_FS_ERROR).
*/ if (hash)
*hash ^= fanotify_hash_fh(fh);
return FANOTIFY_FH_HDR_LEN + fh_len;
out_err:
pr_warn_ratelimited("fanotify: failed to encode fid (type=%d, len=%d, err=%i)\n",
type, fh_len, err);
kfree(ext_buf);
*fanotify_fh_ext_buf_ptr(fh) = NULL; /* Report the event without a file identifier on encode error */
fh->type = FILEID_INVALID;
fh->len = 0; return 0;
}
/* * FAN_REPORT_FID is ambiguous in that it reports the fid of the child for * some events and the fid of the parent for create/delete/move events. * * With the FAN_REPORT_TARGET_FID flag, the fid of the child is reported * also in create/delete/move events in addition to the fid of the parent * and the name of the child.
*/ staticinlinebool fanotify_report_child_fid(unsignedint fid_mode, u32 mask)
{ if (mask & ALL_FSNOTIFY_DIRENT_EVENTS) return (fid_mode & FAN_REPORT_TARGET_FID);
/* * The inode to use as identifier when reporting fid depends on the event * and the group flags. * * With the group flag FAN_REPORT_TARGET_FID, always report the child fid. * * Without the group flag FAN_REPORT_TARGET_FID, report the modified directory * fid on dirent events and the child fid otherwise. * * For example: * FS_ATTRIB reports the child fid even if reported on a watched parent. * FS_CREATE reports the modified dir fid without FAN_REPORT_TARGET_FID. * and reports the created child fid with FAN_REPORT_TARGET_FID.
*/ staticstruct inode *fanotify_fid_inode(u32 event_mask, constvoid *data, int data_type, struct inode *dir, unsignedint fid_mode)
{ if ((event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) &&
!(fid_mode & FAN_REPORT_TARGET_FID)) return dir;
return fsnotify_data_inode(data, data_type);
}
/* * The inode to use as identifier when reporting dir fid depends on the event. * Report the modified directory inode on dirent modification events. * Report the "victim" inode if "victim" is a directory. * Report the parent inode if "victim" is not a directory and event is * reported to parent. * Otherwise, do not report dir fid.
*/ staticstruct inode *fanotify_dfid_inode(u32 event_mask, constvoid *data, int data_type, struct inode *dir)
{ struct inode *inode = fsnotify_data_inode(data, data_type);
if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) return dir;
if (inode && S_ISDIR(inode->i_mode)) return inode;
if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { /* * For certain events and group flags, report the child fid * in addition to reporting the parent fid and maybe child name.
*/ if (fanotify_report_child_fid(fid_mode, mask) && id != dirid)
child = id;
id = dirid;
/* * We record file name only in a group with FAN_REPORT_NAME * and when we have a directory inode to report. * * For directory entry modification event, we record the fid of * the directory and the name of the modified entry. * * For event on non-directory that is reported to parent, we * record the fid of the parent and the name of the child. * * Even if not reporting name, we need a variable length * fanotify_name_event if reporting both parent and child fids.
*/ if (!(fid_mode & FAN_REPORT_NAME)) {
name_event = !!child;
file_name = NULL;
} elseif ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) {
name_event = true;
}
/* * In the special case of FAN_RENAME event, use the match_mask * to determine if we need to report only the old parent+name, * only the new parent+name or both. * 'dirid' and 'file_name' are the old parent+name and * 'moved' has the new parent+name.
*/ if (mask & FAN_RENAME) { bool report_old, report_new;
if (WARN_ON_ONCE(!match_mask)) return NULL;
/* Report both old and new parent+name if sb watching */
report_old = report_new =
match_mask & (1U << FSNOTIFY_ITER_TYPE_SB);
report_old |=
match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE);
report_new |=
match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE2);
if (!report_old) { /* Do not report old parent+name */
dirid = NULL;
file_name = NULL;
} if (report_new) { /* Report new parent+name */
moved = fsnotify_data_dentry(data, data_type);
}
}
}
/* * For queues with unlimited length lost events are not expected and * can possibly have security implications. Avoid losing events when * memory is short. For the limited size queues, avoid OOM killer in the * target monitoring memcg as it may have security repercussion.
*/ if (group->max_events == UINT_MAX)
gfp |= __GFP_NOFAIL; else
gfp |= __GFP_RETRY_MAYFAIL;
/* Whoever is interested in the event, pays for the allocation. */
old_memcg = set_active_memcg(group->memcg);
if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
pid = get_pid(task_pid(current)); else
pid = get_pid(task_tgid(current));
/* Mix event info, FAN_ONDIR flag and pid into event merge key */
hash ^= hash_long((unsignedlong)pid | ondir, FANOTIFY_EVENT_HASH_BITS);
fanotify_init_event(event, hash, mask);
event->pid = pid;
out:
set_active_memcg(old_memcg); return event;
}
/* * Get cached fsid of the filesystem containing the object from any mark. * All marks are supposed to have the same fsid, but we do not verify that here.
*/ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
{ struct fsnotify_mark *mark; int type;
__kernel_fsid_t fsid = {};
if (fanotify_is_perm_event(mask)) { /* * fsnotify_prepare_user_wait() fails if we race with mark * deletion. Just let the operation pass in that case.
*/ if (!fsnotify_prepare_user_wait(iter_info)) return 0;
}
if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS))
fsid = fanotify_get_fsid(iter_info);
event = fanotify_alloc_event(group, mask, data, data_type, dir,
file_name, &fsid, match_mask);
ret = -ENOMEM; if (unlikely(!event)) { /* * We don't queue overflow events for permission events as * there the access is denied and so no event is in fact lost.
*/ if (!fanotify_is_perm_event(mask))
fsnotify_queue_overflow(group); goto finish;
}
fsn_event = &event->fse;
ret = fsnotify_insert_event(group, fsn_event, fanotify_merge,
fanotify_insert_event); if (ret) { /* Permission events shouldn't be merged */
BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); /* Our event wasn't used in the end. Free it. */
fsnotify_destroy_event(group, fsn_event);
ret = 0;
} elseif (fanotify_is_perm_event(mask)) {
ret = fanotify_get_response(group, FANOTIFY_PERM(event),
iter_info);
}
finish: if (fanotify_is_perm_event(mask))
fsnotify_finish_user_wait(iter_info);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.