/* Dedupe requires both ranges to be within EOF. */ if ((remap_flags & REMAP_FILE_DEDUP) &&
(pos_in >= size_in || pos_in + count > size_in ||
pos_out >= size_out || pos_out + count > size_out)) return -EINVAL;
/* Ensure the infile range is within the infile. */ if (pos_in >= size_in) return -EINVAL;
count = min(count, size_in - (uint64_t)pos_in);
ret = generic_write_check_limits(file_out, pos_out, &count); if (ret) return ret;
/* * If the user wanted us to link to the infile's EOF, round up to the * next block boundary for this check. * * Otherwise, make sure the count is also block-aligned, having * already confirmed the starting offsets' block alignment.
*/ if (pos_in + count == size_in &&
(!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) {
bcount = ALIGN(size_in, bs) - pos_in;
} else { if (!IS_ALIGNED(count, bs))
count = ALIGN_DOWN(count, bs);
bcount = count;
}
/* Don't allow overlapped cloning within the same file. */ if (inode_in == inode_out &&
pos_out + bcount > pos_in &&
pos_out < pos_in + bcount) return -EINVAL;
/* * We shortened the request but the caller can't deal with that, so * bounce the request back to userspace.
*/ if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN)) return -EINVAL;
*req_count = count; return 0;
}
int remap_verify_area(struct file *file, loff_t pos, loff_t len, bool write)
{ int mask = write ? MAY_WRITE : MAY_READ;
loff_t tmp; int ret;
if (unlikely(pos < 0 || len < 0)) return -EINVAL;
if (unlikely(check_add_overflow(pos, len, &tmp))) return -EINVAL;
ret = security_file_permission(file, mask); if (ret) return ret;
/* * Ensure that we don't remap a partial EOF block in the middle of something * else. Assume that the offsets have already been checked for block * alignment. * * For clone we only link a partial EOF block above or at the destination file's * EOF. For deduplication we accept a partial EOF block only if it ends at the * destination file's EOF (can not link it into the middle of a file). * * Shorten the request if possible.
*/ staticint generic_remap_check_len(struct inode *inode_in, struct inode *inode_out,
loff_t pos_out,
loff_t *len, unsignedint remap_flags)
{
u64 blkmask = i_blocksize(inode_in) - 1;
loff_t new_len = *len;
if ((*len & blkmask) == 0) return 0;
if (pos_out + *len < i_size_read(inode_out))
new_len &= ~blkmask;
/* Read a page's worth of file data into the page cache. */ staticstruct folio *vfs_dedupe_get_folio(struct file *file, loff_t pos)
{ return read_mapping_folio(file->f_mapping, pos >> PAGE_SHIFT, file);
}
/* * Lock two folios, ensuring that we lock in offset order if the folios * are from the same file.
*/ staticvoid vfs_lock_two_folios(struct folio *folio1, struct folio *folio2)
{ /* Always lock in order of increasing index. */ if (folio1->index > folio2->index)
swap(folio1, folio2);
folio_lock(folio1); if (folio1 != folio2)
folio_lock(folio2);
}
/* Unlock two folios, being careful not to unlock the same folio twice. */ staticvoid vfs_unlock_two_folios(struct folio *folio1, struct folio *folio2)
{
folio_unlock(folio1); if (folio1 != folio2)
folio_unlock(folio2);
}
/* * Compare extents of two files to see if they are the same. * Caller must have locked both inodes to prevent write races.
*/ staticint vfs_dedupe_file_range_compare(struct file *src, loff_t srcoff, struct file *dest, loff_t dstoff,
loff_t len, bool *is_same)
{ bool same = true; int error = -EINVAL;
/* * Now that we've locked both folios, make sure they're still * mapped to the file data we're interested in. If not, * someone is invalidating pages on us and we lose.
*/ if (!folio_test_uptodate(src_folio) || !folio_test_uptodate(dst_folio) ||
src_folio->mapping != src->f_mapping ||
dst_folio->mapping != dest->f_mapping) {
same = false; goto unlock;
}
srcoff += cmp_len;
dstoff += cmp_len;
len -= cmp_len;
}
*is_same = same; return 0;
out_error: return error;
}
/* * Check that the two inodes are eligible for cloning, the ranges make * sense, and then flush all dirty data. Caller must ensure that the * inodes have been locked against any other modifications. * * If there's an error, then the usual negative error code is returned. * Otherwise returns 0 with *len set to the request length.
*/ int
__generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out,
loff_t *len, unsignedint remap_flags, conststruct iomap_ops *dax_read_ops)
{ struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); bool same_inode = (inode_in == inode_out); int ret;
/* Don't touch certain kinds of inodes */ if (IS_IMMUTABLE(inode_out)) return -EPERM;
if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) return -ETXTBSY;
/* Don't reflink dirs, pipes, sockets... */ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) return -EINVAL;
/* Zero length dedupe exits immediately; reflink goes to EOF. */ if (*len == 0) {
loff_t isize = i_size_read(inode_in);
if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize) return 0; if (pos_in > isize) return -EINVAL;
*len = isize - pos_in; if (*len == 0) return 0;
}
/* Check that we don't violate system file offset limits. */
ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
remap_flags); if (ret || *len == 0) return ret;
/* Wait for the completion of any pending IOs on both files */
inode_dio_wait(inode_in); if (!same_inode)
inode_dio_wait(inode_out);
ret = filemap_write_and_wait_range(inode_in->i_mapping,
pos_in, pos_in + *len - 1); if (ret) return ret;
ret = filemap_write_and_wait_range(inode_out->i_mapping,
pos_out, pos_out + *len - 1); if (ret) return ret;
/* * Check that the extents are the same.
*/ if (remap_flags & REMAP_FILE_DEDUP) { bool is_same = false;
if (!IS_DAX(inode_in))
ret = vfs_dedupe_file_range_compare(file_in, pos_in,
file_out, pos_out, *len, &is_same); elseif (dax_read_ops)
ret = dax_dedupe_file_range_compare(inode_in, pos_in,
inode_out, pos_out, *len, &is_same,
dax_read_ops); else return -EINVAL; if (ret) return ret; if (!is_same) return -EBADE;
}
ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
remap_flags); if (ret || *len == 0) return ret;
/* If can't alter the file contents, we're done. */ if (!(remap_flags & REMAP_FILE_DEDUP))
ret = file_modified(file_out);
/* Check whether we are allowed to dedupe the destination file */ staticbool may_dedupe_file(struct file *file)
{ struct mnt_idmap *idmap = file_mnt_idmap(file); struct inode *inode = file_inode(file);
if (capable(CAP_SYS_ADMIN)) returntrue; if (file->f_mode & FMODE_WRITE) returntrue; if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) returntrue; if (!inode_permission(idmap, inode, MAY_WRITE)) returntrue; returnfalse;
}
/* * This is redundant if called from vfs_dedupe_file_range(), but other * callers need it and it's not performance sesitive...
*/
ret = remap_verify_area(src_file, src_pos, len, false); if (ret) return ret;
ret = remap_verify_area(dst_file, dst_pos, len, true); if (ret) return ret;
/* * This needs to be called after remap_verify_area() because of * sb_start_write() and before may_dedupe_file() because the mount's * MAY_WRITE need to be checked with mnt_get_write_access_file() held.
*/
ret = mnt_want_write_file(dst_file); if (ret) return ret;
ret = -EPERM; if (!may_dedupe_file(dst_file)) goto out_drop_write;
ret = -EXDEV; if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb) goto out_drop_write;
ret = -EISDIR; if (S_ISDIR(file_inode(dst_file)->i_mode)) goto out_drop_write;
ret = -EINVAL; if (!dst_file->f_op->remap_file_range) goto out_drop_write;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.