if (rbio->bio.bi_iter.bi_size == bytes)
flags |= BCH_READ_last_fragment;
bch2_bio_page_state_set(&rbio->bio, k);
bch2_read_extent(trans, rbio, iter.pos,
data_btree, k, offset_into_extent, flags); /* * Careful there's a landmine here if bch2_read_extent() ever * starts returning transaction restarts here. * * We've changed rbio->bi_iter.bi_size to be "bytes we can read * from this extent" with the swap call, and we restore it * below. That restore needs to come before checking for * errors. * * But unlike __bch2_read(), we use the rbio bvec iter, not one * on the stack, so we can't do the restore right after the * bch2_read_extent() call: we don't own that iterator anymore * if BCH_READ_last_fragment is set, since we may have submitted * that rbio instead of cloning it.
*/
int ret = readpages_iter_init(&readpages_iter, ractl); if (ret) return;
/* * Besides being a general performance optimization, plugging helps with * avoiding btree transaction srcu warnings - submitting a bio can * block, and we don't want todo that with the transaction locked. * * However, plugged bios are submitted when we schedule; we ideally * would have our own scheduler hook to call unlock_long() before * scheduling.
*/
blk_start_plug(&plug);
bch2_pagecache_add_get(inode);
struct btree_trans *trans = bch2_trans_get(c); while ((folio = readpage_iter_peek(&readpages_iter))) { unsigned n = min_t(unsigned,
readpages_iter.folios.nr -
readpages_iter.idx,
BIO_MAX_VECS); struct bch_read_bio *rbio =
rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ,
GFP_KERNEL, &c->bio_read),
c,
opts,
bch2_readpages_end_io);
/* * Determine when a writepage io is full. We have to limit writepage bios to a * single page per bvec (i.e. 1MB with 4k pages) because that is the limit to * what the bounce path in bch2_write_extent() can handle. In theory we could * loosen this restriction for non-bounce I/O, but we don't have that context * here. Ideally, we can up this limit and make it configurable in the future * when the bounce path can be enhanced to accommodate larger source bios.
*/ staticinlinebool bch_io_full(struct bch_writepage_io *io, unsigned len)
{ struct bio *bio = &io->op.wbio.bio; return bio_full(bio, len) ||
(bio->bi_iter.bi_size + len > BIO_MAX_VECS * PAGE_SIZE);
}
s = __bch2_folio(fi.folio);
spin_lock(&s->lock); for (i = 0; i < folio_sectors(fi.folio); i++)
s->s[i].nr_replicas = 0;
spin_unlock(&s->lock);
}
}
/* * racing with fallocate can cause us to add fewer sectors than * expected - but we shouldn't add more sectors than expected:
*/
WARN_ON_ONCE(io->op.i_sectors_delta > 0);
/* * (error (due to going RO) halfway through a page can screw that up * slightly) * XXX wtf? BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS);
*/
/* * The writeback flag is effectively our ref on the inode - * fixup i_blocks before calling folio_end_writeback:
*/
bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
/* * Get a bch_writepage_io and add @page to it - appending to an existing one if * possible, else allocating a new one:
*/ staticvoid bch2_writepage_io_alloc(struct bch_fs *c, struct writeback_control *wbc, struct bch_writepage_state *w, struct bch_inode_info *inode,
u64 sector, unsigned nr_replicas)
{ struct bch_write_op *op;
/* Is the folio fully inside i_size? */ if (folio_end_pos(folio) <= i_size) goto do_io;
/* Is the folio fully outside i_size? (truncate in progress) */ if (folio_pos(folio) >= i_size) {
folio_unlock(folio); return 0;
}
/* * The folio straddles i_size. It must be zeroed out on each and every * writepage invocation because it may be mmapped. "A file is mapped * in multiples of the folio size. For a file that is not a multiple of * the folio size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file."
*/
folio_zero_segment(folio,
i_size - folio_pos(folio),
folio_size(folio));
do_io:
f_sectors = folio_sectors(folio);
s = bch2_folio(folio);
/* If we're writing entire folio, don't need to read it in first: */ if (!offset && len == folio_size(folio)) goto out;
if (!offset && pos + len >= inode->v.i_size) {
folio_zero_segment(folio, len, folio_size(folio));
flush_dcache_folio(folio); goto out;
}
if (folio_pos(folio) >= inode->v.i_size) {
folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio));
flush_dcache_folio(folio); goto out;
}
readpage:
ret = bch2_read_single_folio(folio, mapping); if (ret) goto err;
out:
ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); if (ret) goto err;
ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len); if (ret) { if (!folio_test_uptodate(folio)) { /* * If the folio hasn't been read in, we won't know if we * actually need a reservation - we don't actually need * to read here, we just need to check if the folio is * fully backed by uncompressed data:
*/ goto readpage;
}
if (unlikely(copied < len && !folio_test_uptodate(folio))) { /* * The folio needs to be read in, but that would destroy * our partial write - simplest thing is to just force * userspace to redo the write:
*/
folio_zero_range(folio, 0, folio_size(folio));
flush_dcache_folio(folio);
copied = 0;
}
/* * If the last folio added to the mapping starts beyond current EOF, we * performed a short write but left around at least one post-EOF folio. * Clean up the mapping before we return.
*/ if (last_folio_pos >= inode->v.i_size)
truncate_pagecache(&inode->v, inode->v.i_size);
do { unsigned offset = pos & (PAGE_SIZE - 1); unsigned bytes = iov_iter_count(iter);
again: /* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. * * Not only is this an optimisation, but it is also required * to check that the address is actually valid, when atomic * usercopies are used, below.
*/ if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
bytes = min_t(unsignedlong, iov_iter_count(iter),
PAGE_SIZE - offset);
if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
ret = -EFAULT; break;
}
}
if (unlikely(fatal_signal_pending(current))) {
ret = -EINTR; break;
}
ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); if (unlikely(ret < 0)) break;
cond_resched();
if (unlikely(ret == 0)) { /* * If we were unable to copy any data at all, we must * fall back to a single segment length write. * * If we didn't fallback here, we could livelock * because not all segments in the iov can be copied at * once without a pagefault.
*/
bytes = min_t(unsignedlong, PAGE_SIZE - offset,
iov_iter_single_seg_count(iter)); goto again;
}
pos += ret;
written += ret;
ret = 0;
balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(iter));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.