for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) {
scoped_guard(rcu) {
ca = rcu_dereference(c->devs[dev]); if (ca && !enumerated_ref_tryget(&ca->io_ref[WRITE],
BCH_DEV_WRITE_REF_nocow_flush))
ca = NULL;
}
if (fsck_err_on(u.bi_journal_seq > cur_seq,
trans, inode_journal_seq_in_future, "inode journal seq in future (currently at %llu)\n%s",
cur_seq,
(bch2_inode_unpacked_to_text(&buf, &u),
buf.buf))) {
u.bi_journal_seq = cur_seq;
ret = bch2_inode_write(trans, &iter, &u);
}
fsck_err:
bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf); return ret;
}
/* * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an * insert trigger: look up the btree inode instead
*/ staticint bch2_flush_inode(struct bch_fs *c, struct bch_inode_info *inode)
{ if (c->opts.journal_flush_disabled) return 0;
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_fsync)) return -EROFS;
int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{ struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; int ret, err;
trace_bch2_fsync(file, datasync);
ret = file_write_and_wait_range(file, start, end); if (ret) goto out;
ret = sync_inode_metadata(&inode->v, 1); if (ret) goto out;
ret = bch2_flush_inode(c, inode);
out:
ret = bch2_err_class(ret); if (ret == -EROFS)
ret = -EIO;
err = file_check_and_advance_wb_err(file); if (!ret)
ret = err;
folio = filemap_lock_folio(mapping, index); if (IS_ERR_OR_NULL(folio)) { /* * XXX: we're doing two index lookups when we end up reading the * folio
*/
ret = range_has_data(c, inode->ei_inum.subvol,
POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)),
POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS)); if (ret <= 0) return ret;
folio = __filemap_get_folio(mapping, index,
FGP_LOCK|FGP_CREAT, GFP_KERNEL); if (IS_ERR(folio)) {
ret = -ENOMEM; goto out;
}
}
/* * Caller needs to know whether this folio will be written out by * writeback - doing an i_size update if necessary - or whether it will * be responsible for the i_size update. * * Note that we shouldn't ever see a folio beyond EOF, but check and * warn if so. This has been observed by failure to clean up folios * after a short write and there's still a chance reclaim will fix * things up.
*/
WARN_ON_ONCE(folio_pos(folio) >= inode->v.i_size);
end_pos = folio_end_pos(folio); if (inode->v.i_size > folio_pos(folio))
end_pos = min_t(u64, inode->v.i_size, end_pos);
ret = s->s[folio_pos_to_s(folio, end_pos - 1)].state >= SECTOR_dirty;
/* * Bit of a hack - we don't want truncate to fail due to -ENOSPC. * * XXX: because we aren't currently tracking whether the folio has actual * data in it (vs. just 0s, or only partially written) this wrong. ick.
*/
BUG_ON(bch2_get_folio_disk_reservation(c, inode, folio, false));
/* * This removes any writeable userspace mappings; we need to force * .page_mkwrite to be called again before any mmapped writes, to * redirty the full page:
*/
folio_mkclean(folio);
filemap_dirty_folio(mapping, folio);
unlock:
folio_unlock(folio);
folio_put(folio);
out: return ret;
}
/* * sync appends: * * this has to be done _before_ extending i_size:
*/
ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX); if (ret) return ret;
/* * If the truncate call with change the size of the file, the * cmtimes should be updated. If the size will not change, we * do not need to update the cmtimes.
*/ if (iattr->ia_size != inode->v.i_size) { if (!(iattr->ia_valid & ATTR_MTIME))
ktime_get_coarse_real_ts64(&iattr->ia_mtime); if (!(iattr->ia_valid & ATTR_CTIME))
ktime_get_coarse_real_ts64(&iattr->ia_ctime);
iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME;
}
ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u); if (ret) goto err;
/* * check this before next assertion; on filesystem error our normal * invariants are a bit broken (truncate has to truncate the page cache * before the inode).
*/
ret = bch2_journal_error(&c->journal); if (ret) goto err;
WARN_ONCE(!test_bit(EI_INODE_ERROR, &inode->ei_flags) &&
inode->v.i_size < inode_u.bi_size, "truncate spotted in mem i_size < btree i_size: %llu < %llu\n",
(u64) inode->v.i_size, inode_u.bi_size);
if (iattr->ia_size > inode->v.i_size) {
ret = bch2_extend(idmap, inode, &inode_u, iattr); goto err;
}
iattr->ia_valid &= ~ATTR_SIZE;
ret = bch2_truncate_folio(inode, iattr->ia_size); if (unlikely(ret < 0)) goto err;
ret = 0;
truncate_setsize(&inode->v, iattr->ia_size);
/* * When extending, we're going to write the new i_size to disk * immediately so we need to flush anything above the current on disk * i_size first: * * Also, when extending we need to flush the page that i_size currently * straddles - if it's mapped to userspace, we need to ensure that * userspace has to redirty it and call .mkwrite -> set_page_dirty * again to allocate the part of the page that was extended.
*/ if (iattr->ia_size > inode_u.bi_size)
ret = filemap_write_and_wait_range(mapping,
inode_u.bi_size,
iattr->ia_size - 1); elseif (iattr->ia_size & (PAGE_SIZE - 1))
ret = filemap_write_and_wait_range(mapping,
round_down(iattr->ia_size, PAGE_SIZE),
iattr->ia_size - 1); if (ret) goto err;
ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta);
bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
if (unlikely(ret)) { /* * If we error here, VFS caches are now inconsistent with btree
*/
set_bit(EI_INODE_ERROR, &inode->ei_flags); goto err;
}
if (unlikely(!inode->v.i_size && inode->v.i_blocks &&
!bch2_journal_error(&c->journal))) { struct printbuf buf = PRINTBUF;
bch2_log_msg_start(c, &buf);
prt_printf(&buf, "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks,
inode->ei_inode.bi_sectors);
ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9);
/* * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update, * so that the VFS cache i_size is consistent with the btree i_size:
*/ if (ret &&
!(bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE))) return ret;
if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size)
end = inode->v.i_size;
if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
ret = bchfs_fallocate(inode, mode, offset, len); elseif (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
ret = bchfs_fpunch(inode, offset, len); elseif (mode == FALLOC_FL_INSERT_RANGE)
ret = bchfs_fcollapse_finsert(inode, offset, len, true); elseif (mode == FALLOC_FL_COLLAPSE_RANGE)
ret = bchfs_fcollapse_finsert(inode, offset, len, false); else
ret = -EOPNOTSUPP;
err:
bch2_pagecache_block_put(inode);
inode_unlock(&inode->v);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_fallocate);
return bch2_err_class(ret);
}
/* * Take a quota reservation for unallocated blocks in a given file range * Does not check pagecache
*/ staticint quota_reserve_range(struct bch_inode_info *inode, struct quota_res *res,
u64 start, u64 end)
{ struct bch_fs *c = inode->v.i_sb->s_fs_info;
u64 sectors = end - start;
int ret = bch2_trans_run(c,
for_each_btree_key_in_subvolume_max(trans, iter,
BTREE_ID_extents,
POS(inode->v.i_ino, start),
POS(inode->v.i_ino, end - 1),
inode->ei_inum.subvol, 0, k, ({ if (bkey_extent_is_allocation(k.k)) {
u64 s = min(end, k.k->p.offset) -
max(start, bkey_start_offset(k.k));
BUG_ON(s > sectors);
sectors -= s;
}
0;
})));
return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true);
}
/* * XXX: we'd like to be telling bch2_remap_range() if we have * permission to write to the source file, and thus if io path option * changes should be propagated through the copy, but we need mnt_idmap * from the pathwalk, awkward
*/
ret = bch2_remap_range(c,
inode_inum(dst), pos_dst >> 9,
inode_inum(src), pos_src >> 9,
aligned_len >> 9,
pos_dst + len, &i_sectors_delta, false); if (ret < 0) goto err;
/* * due to alignment, we might have remapped slightly more than requsted
*/
ret = min((u64) ret << 9, (u64) len);
/* * Found a hole in the btree, now make sure it's * a hole in the pagecache. We might have to * keep searching if this hole is entirely dirty * in the page cache:
*/
bch2_trans_unlock(trans);
loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v,
start_offset, end_offset, 0, false); if (pagecache_hole < end_offset) {
next_hole = pagecache_hole; break;
}
} else {
offset = max(offset, bkey_start_offset(k.k) << 9);
}
0;
}))); if (ret) return ret;
switch (whence) { case SEEK_SET: case SEEK_CUR: case SEEK_END:
ret = generic_file_llseek(file, offset, whence); break; case SEEK_DATA:
ret = bch2_seek_data(file, offset); break; case SEEK_HOLE:
ret = bch2_seek_hole(file, offset); break; default:
ret = -EINVAL; break;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.