/* * If the ecc fails, we return the error but otherwise * leave the filesystem running. We know any error is * local to this block.
*/
rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check); if (rc) {
mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
(unsignedlonglong)bh->b_blocknr); return rc;
}
if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
rc = ocfs2_error(sb, "Refcount block #%llu has bad signature %.*s\n",
(unsignedlonglong)bh->b_blocknr, 7,
rb->rf_signature); goto out;
}
if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
rc = ocfs2_error(sb, "Refcount block #%llu has an invalid rf_blkno of %llu\n",
(unsignedlonglong)bh->b_blocknr,
(unsignedlonglong)le64_to_cpu(rb->rf_blkno)); goto out;
}
if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
rc = ocfs2_error(sb, "Refcount block #%llu has an invalid rf_fs_generation of #%u\n",
(unsignedlonglong)bh->b_blocknr,
le32_to_cpu(rb->rf_fs_generation)); goto out;
}
out: return rc;
}
spin_lock(&osb->osb_lock); if (osb->osb_ref_tree_lru &&
osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
tree = osb->osb_ref_tree_lru; else
tree = ocfs2_find_refcount_tree(osb, rf_blkno); if (tree) goto out;
spin_unlock(&osb->osb_lock);
new = ocfs2_allocate_refcount_tree(osb, rf_blkno); if (!new) {
ret = -ENOMEM;
mlog_errno(ret); return ret;
} /* * We need the generation to create the refcount tree lock and since * it isn't changed during the tree modification, we are safe here to * read without protection. * We also have to purge the cache after we create the lock since the * refcount block may have the stale data. It can only be trusted when * we hold the refcount lock.
*/
ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh); if (ret) {
mlog_errno(ret);
ocfs2_metadata_cache_exit(&new->rf_ci);
kfree(new); return ret;
}
staticint __ocfs2_lock_refcount_tree(struct ocfs2_super *osb, struct ocfs2_refcount_tree *tree, int rw)
{ int ret;
ret = ocfs2_refcount_lock(tree, rw); if (ret) {
mlog_errno(ret); goto out;
}
if (rw)
down_write(&tree->rf_sem); else
down_read(&tree->rf_sem);
out: return ret;
}
/* * Lock the refcount tree pointed by ref_blkno and return the tree. * In most case, we lock the tree and read the refcount block. * So read it here if the caller really needs it. * * If the tree has been re-created by other node, it will free the * old one and re-create it.
*/ int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
u64 ref_blkno, int rw, struct ocfs2_refcount_tree **ret_tree, struct buffer_head **ref_bh)
{ int ret, delete_tree = 0; struct ocfs2_refcount_tree *tree = NULL; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_block *rb;
again:
ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree); if (ret) {
mlog_errno(ret); return ret;
}
ocfs2_refcount_tree_get(tree);
ret = __ocfs2_lock_refcount_tree(osb, tree, rw); if (ret) {
mlog_errno(ret);
ocfs2_refcount_tree_put(tree); goto out;
}
ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
&ref_root_bh); if (ret) {
mlog_errno(ret);
ocfs2_unlock_refcount_tree(osb, tree, rw); goto out;
}
rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; /* * If the refcount block has been freed and re-created, we may need * to recreate the refcount tree also. * * Here we just remove the tree from the rb-tree, and the last * kref holder will unlock and delete this refcount_tree. * Then we goto "again" and ocfs2_get_refcount_tree will create * the new refcount tree for us.
*/ if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) { if (!tree->rf_removed) {
ocfs2_erase_refcount_tree_from_list(osb, tree);
tree->rf_removed = 1;
delete_tree = 1;
}
ocfs2_unlock_refcount_tree(osb, tree, rw); /* * We get an extra reference when we create the refcount * tree, so another put will destroy it.
*/ if (delete_tree)
ocfs2_refcount_tree_put(tree);
brelse(ref_root_bh);
ref_root_bh = NULL; goto again;
}
/* * We have to init the tree lock here since it will use * the generation number to create it.
*/
new_tree->rf_generation = le32_to_cpu(rb->rf_generation);
ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno,
new_tree->rf_generation);
spin_lock(&osb->osb_lock);
tree = ocfs2_find_refcount_tree(osb, first_blkno);
/* * We've just created a new refcount tree in this block. If * we found a refcount tree on the ocfs2_super, it must be * one we just deleted. We free the old tree before * inserting the new tree.
*/
BUG_ON(tree && tree->rf_generation == new_tree->rf_generation); if (tree)
ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
ocfs2_insert_refcount_tree(osb, new_tree);
spin_unlock(&osb->osb_lock);
new_tree = NULL; if (tree)
ocfs2_refcount_tree_put(tree);
out_commit:
ocfs2_commit_trans(osb, handle);
out: if (new_tree) {
ocfs2_metadata_cache_exit(&new_tree->rf_ci);
kfree(new_tree);
}
brelse(new_bh); if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
/* * If we are the last user, we need to free the block. * So lock the allocator ahead.
*/ if (le32_to_cpu(rb->rf_count) == 1) {
blk = le64_to_cpu(rb->rf_blkno);
bit = le16_to_cpu(rb->rf_suballoc_bit); if (rb->rf_suballoc_loc)
bg_blkno = le64_to_cpu(rb->rf_suballoc_loc); else
bg_blkno = ocfs2_which_suballoc_group(blk, bit);
alloc_inode = ocfs2_get_system_file_inode(osb,
EXTENT_ALLOC_SYSTEM_INODE,
le16_to_cpu(rb->rf_suballoc_slot)); if (!alloc_inode) {
ret = -ENOMEM;
mlog_errno(ret); goto out;
}
inode_lock(alloc_inode);
ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1); if (ret) {
mlog_errno(ret); goto out_mutex;
}
credits += OCFS2_SUBALLOC_FREE;
}
handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret); goto out_unlock;
}
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
OCFS2_JOURNAL_ACCESS_WRITE); if (ret) {
mlog_errno(ret); goto out_commit;
}
ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh,
OCFS2_JOURNAL_ACCESS_WRITE); if (ret) {
mlog_errno(ret); goto out_commit;
}
/* ok, cpos fail in this rec. Just return. */ if (ret_rec)
*ret_rec = *rec; goto out;
}
if (ret_rec) { /* We meet with a hole here, so fake the rec. */
ret_rec->r_cpos = cpu_to_le64(cpos);
ret_rec->r_refcount = 0; if (i < le16_to_cpu(rb->rf_records.rl_used) &&
le64_to_cpu(rec->r_cpos) < cpos + len)
ret_rec->r_clusters =
cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos); else
ret_rec->r_clusters = cpu_to_le32(len);
}
out:
*index = i;
}
/* * Try to remove refcount tree. The mechanism is: * 1) Check whether i_clusters == 0, if no, exit. * 2) check whether we have i_xattr_loc in dinode. if yes, exit. * 3) Check whether we have inline xattr stored outside, if yes, exit. * 4) Remove the tree.
*/ int ocfs2_try_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
{ int ret; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && di->i_xattr_loc) goto out;
if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL &&
ocfs2_has_inline_xattr_value_outside(inode, di)) goto out;
ret = ocfs2_remove_refcount_tree(inode, di_bh); if (ret)
mlog_errno(ret);
out:
up_write(&oi->ip_alloc_sem);
up_write(&oi->ip_xattr_sem); return 0;
}
/* * Find the end range for a leaf refcount block indicated by * el->l_recs[index].e_blkno.
*/ staticint ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct ocfs2_extent_block *eb, struct ocfs2_extent_list *el, int index, u32 *cpos_end)
{ int ret, i, subtree_root;
u32 cpos;
u64 blkno; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct ocfs2_path *left_path = NULL, *right_path = NULL; struct ocfs2_extent_tree et; struct ocfs2_extent_list *tmp_el;
if (index < le16_to_cpu(el->l_next_free_rec) - 1) { /* * We have a extent rec after index, so just use the e_cpos * of the next extent rec.
*/
*cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos); return 0;
}
if (!eb || !eb->h_next_leaf_blk) { /* * We are the last extent rec, so any high cpos should * be stored in this leaf refcount block.
*/
*cpos_end = UINT_MAX; return 0;
}
/* * If the extent block isn't the last one, we have to find * the subtree root between this extent block and the next * leaf extent block and get the corresponding e_cpos from * the subroot. Otherwise we may corrupt the b-tree.
*/
ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
left_path = ocfs2_new_path_from_et(&et); if (!left_path) {
ret = -ENOMEM;
mlog_errno(ret); goto out;
}
cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos);
ret = ocfs2_find_path(ci, left_path, cpos); if (ret) {
mlog_errno(ret); goto out;
}
right_path = ocfs2_new_path_from_path(left_path); if (!right_path) {
ret = -ENOMEM;
mlog_errno(ret); goto out;
}
ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos); if (ret) {
mlog_errno(ret); goto out;
}
ret = ocfs2_find_path(ci, right_path, cpos); if (ret) {
mlog_errno(ret); goto out;
}
/* * Given a cpos and len, try to find the refcount record which contains cpos. * 1. If cpos can be found in one refcount record, return the record. * 2. If cpos can't be found, return a fake record which start from cpos * and end at a small value between cpos+len and start of the next record. * This fake record has r_refcount = 0.
*/ staticint ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh,
u64 cpos, unsignedint len, struct ocfs2_refcount_rec *ret_rec, int *index, struct buffer_head **ret_bh)
{ int ret = 0, i, found;
u32 low_cpos, cpos_end; struct ocfs2_extent_list *el; struct ocfs2_extent_rec *rec = NULL; struct ocfs2_extent_block *eb = NULL; struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct ocfs2_refcount_block *rb =
(struct ocfs2_refcount_block *)ref_root_bh->b_data;
el = &rb->rf_list;
low_cpos = cpos & OCFS2_32BIT_POS_MASK;
if (el->l_tree_depth) {
ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh); if (ret) {
mlog_errno(ret); goto out;
}
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
if (el->l_tree_depth) {
ret = ocfs2_error(sb, "refcount tree %llu has non zero tree depth in leaf btree tree block %llu\n",
(unsignedlonglong)ocfs2_metadata_cache_owner(ci),
(unsignedlonglong)eb_bh->b_blocknr); goto out;
}
}
found = 0; for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
rec = &el->l_recs[i];
if (le32_to_cpu(rec->e_cpos) <= low_cpos) {
found = 1; break;
}
}
if (found) {
ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh,
eb, el, i, &cpos_end); if (ret) {
mlog_errno(ret); goto out;
}
if (cpos_end < low_cpos + len)
len = cpos_end - low_cpos;
}
ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
&ref_leaf_bh); if (ret) {
mlog_errno(ret); goto out;
}
/* * Merge the refcount rec if we are contiguous with the adjacent recs.
*/ staticvoid ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb, int index)
{ enum ocfs2_ref_rec_contig contig =
ocfs2_refcount_rec_contig(rb, index);
ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE); if (ret) {
mlog_errno(ret); goto out;
}
ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
&suballoc_bit_start, &num_got,
&blkno); if (ret) {
mlog_errno(ret); goto out;
}
new_bh = sb_getblk(sb, blkno); if (new_bh == NULL) {
ret = -ENOMEM;
mlog_errno(ret); goto out;
}
ocfs2_set_new_buffer_uptodate(ci, new_bh);
ret = ocfs2_journal_access_rb(handle, ci, new_bh,
OCFS2_JOURNAL_ACCESS_CREATE); if (ret) {
mlog_errno(ret); goto out;
}
/* * Initialize ocfs2_refcount_block. * It should contain the same information as the old root. * so just memcpy it and change the corresponding field.
*/
memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize);
if (l_cpos > r_cpos) return 1; if (l_cpos < r_cpos) return -1; return 0;
}
/* * The refcount cpos are ordered by their 64bit cpos, * But we will use the low 32 bit to be the e_cpos in the b-tree. * So we need to make sure that this pos isn't intersected with others. * * Note: The refcount block is already sorted by their low 32 bit cpos, * So just try the middle pos first, and we will exit when we find * the good position.
*/ staticint ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl,
u32 *split_pos, int *split_index)
{ int num_used = le16_to_cpu(rl->rl_used); int delta, middle = num_used / 2;
/* * XXX: Improvement later. * If we know all the high 32 bit cpos is the same, no need to sort. * * In order to make the whole process safe, we do: * 1. sort the entries by their low 32 bit cpos first so that we can * find the split cpos easily. * 2. call ocfs2_insert_extent to insert the new refcount block. * 3. move the refcount rec to the new block. * 4. sort the entries by their 64 bit cpos. * 5. dirty the new_rb and rb.
*/
sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), sizeof(struct ocfs2_refcount_rec),
cmp_refcount_rec_by_low_cpos, NULL);
ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index); if (ret) {
mlog_errno(ret); return ret;
}
new_rb->rf_cpos = cpu_to_le32(cpos);
/* move refcount records starting from split_index to the new block. */
num_moved = le16_to_cpu(rl->rl_used) - split_index;
memcpy(new_rl->rl_recs, &rl->rl_recs[split_index],
num_moved * sizeof(struct ocfs2_refcount_rec));
/*ok, remove the entries we just moved over to the other block. */
memset(&rl->rl_recs[split_index], 0,
num_moved * sizeof(struct ocfs2_refcount_rec));
/* change old and new rl_used accordingly. */
le16_add_cpu(&rl->rl_used, -num_moved);
new_rl->rl_used = cpu_to_le16(num_moved);
/* Insert the new leaf block with the specific offset cpos. */
ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr,
1, 0, meta_ac); if (ret)
mlog_errno(ret);
if (ref_root_bh == ref_leaf_bh) { /* * the old root bh hasn't been expanded to a b-tree, * so expand it first.
*/
ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh,
&expand_bh, meta_ac); if (ret) {
mlog_errno(ret); goto out;
}
} else {
expand_bh = ref_leaf_bh;
get_bh(expand_bh);
}
/* Now add a new refcount block into the tree.*/
ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh,
expand_bh, meta_ac); if (ret)
mlog_errno(ret);
out:
brelse(expand_bh); return ret;
}
/* * Adjust the extent rec in b-tree representing ref_leaf_bh. * * Only called when we have inserted a new refcount rec at index 0 * which means ocfs2_extent_rec.e_cpos may need some change.
*/ staticint ocfs2_adjust_refcount_rec(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, struct ocfs2_refcount_rec *rec)
{ int ret = 0, i;
u32 new_cpos, old_cpos; struct ocfs2_path *path = NULL; struct ocfs2_extent_tree et; struct ocfs2_refcount_block *rb =
(struct ocfs2_refcount_block *)ref_root_bh->b_data; struct ocfs2_extent_list *el;
if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) goto out;
path = ocfs2_new_path_from_et(&et); if (!path) {
ret = -ENOMEM;
mlog_errno(ret); goto out;
}
ret = ocfs2_find_path(ci, path, old_cpos); if (ret) {
mlog_errno(ret); goto out;
}
/* * 2 more credits, one for the leaf refcount block, one for * the extent block contains the extent rec.
*/
ret = ocfs2_extend_trans(handle, 2); if (ret < 0) {
mlog_errno(ret); goto out;
}
ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) {
mlog_errno(ret); goto out;
}
ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path),
OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) {
mlog_errno(ret); goto out;
}
/* change the leaf extent block first. */
el = path_leaf_el(path);
for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos) break;
BUG_ON(i == le16_to_cpu(el->l_next_free_rec));
el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
/* change the r_cpos in the leaf block. */
rb->rf_cpos = cpu_to_le32(new_cpos);
if (index == 0) {
ret = ocfs2_adjust_refcount_rec(handle, ci,
ref_root_bh,
ref_leaf_bh, rec); if (ret)
mlog_errno(ret);
}
out:
brelse(new_bh); return ret;
}
/* * Split the refcount_rec indexed by "index" in ref_leaf_bh. * This is much simple than our b-tree code. * split_rec is the new refcount rec we want to insert. * If split_rec->r_refcount > 0, we are changing the refcount(in case we * increase refcount or decrease a refcount to non-zero). * If split_rec->r_refcount == 0, we are punching a hole in current refcount * rec( in case we decrease a refcount to zero).
*/ staticint ocfs2_split_refcount_rec(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, struct ocfs2_refcount_rec *split_rec, int index, int merge, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc)
{ int ret, recs_need;
u32 len; struct ocfs2_refcount_block *rb =
(struct ocfs2_refcount_block *)ref_leaf_bh->b_data; struct ocfs2_refcount_list *rf_list = &rb->rf_records; struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index]; struct ocfs2_refcount_rec *tail_rec = NULL; struct buffer_head *new_bh = NULL;
/* * If we just need to split the header or tail clusters, * no more recs are needed, just split is OK. * Otherwise we at least need one new recs.
*/ if (!split_rec->r_refcount &&
(split_rec->r_cpos == orig_rec->r_cpos ||
le64_to_cpu(split_rec->r_cpos) +
le32_to_cpu(split_rec->r_clusters) ==
le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
recs_need = 0; else
recs_need = 1;
/* * We need one more rec if we split in the middle and the new rec have * some refcount in it.
*/ if (split_rec->r_refcount &&
(split_rec->r_cpos != orig_rec->r_cpos &&
le64_to_cpu(split_rec->r_cpos) +
le32_to_cpu(split_rec->r_clusters) !=
le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
recs_need++;
/* If the leaf block don't have enough record, expand it. */ if (le16_to_cpu(rf_list->rl_used) + recs_need >
le16_to_cpu(rf_list->rl_count)) { struct ocfs2_refcount_rec tmp_rec;
u64 cpos = le64_to_cpu(orig_rec->r_cpos);
len = le32_to_cpu(orig_rec->r_clusters);
ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
ref_leaf_bh, meta_ac); if (ret) {
mlog_errno(ret); goto out;
}
/* * We have to re-get it since now cpos may be moved to * another leaf block.
*/
ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
cpos, len, &tmp_rec, &index,
&new_bh); if (ret) {
mlog_errno(ret); goto out;
}
ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
OCFS2_JOURNAL_ACCESS_WRITE); if (ret) {
mlog_errno(ret); goto out;
}
/* * We have calculated out how many new records we need and store * in recs_need, so spare enough space first by moving the records * after "index" to the end.
*/ if (index != le16_to_cpu(rf_list->rl_used) - 1)
memmove(&rf_list->rl_recs[index + 1 + recs_need],
&rf_list->rl_recs[index + 1],
(le16_to_cpu(rf_list->rl_used) - index - 1) * sizeof(struct ocfs2_refcount_rec));
len = (le64_to_cpu(orig_rec->r_cpos) +
le32_to_cpu(orig_rec->r_clusters)) -
(le64_to_cpu(split_rec->r_cpos) +
le32_to_cpu(split_rec->r_clusters));
/* * If we have "len", the we will split in the tail and move it * to the end of the space we have just spared.
*/ if (len) {
tail_rec = &rf_list->rl_recs[index + recs_need];
/* * If the split pos isn't the same as the original one, we need to * split in the head. * * Note: We have the chance that split_rec.r_refcount = 0, * recs_need = 0 and len > 0, which means we just cut the head from * the orig_rec and in that case we have done some modification in * orig_rec above, so the check for r_cpos is faked.
*/ if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) {
len = le64_to_cpu(split_rec->r_cpos) -
le64_to_cpu(orig_rec->r_cpos);
orig_rec->r_clusters = cpu_to_le32(len);
index++;
}
while (len) {
ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
cpos, len, &rec, &index,
&ref_leaf_bh); if (ret) {
mlog_errno(ret); goto out;
}
set_len = le32_to_cpu(rec.r_clusters);
/* * Here we may meet with 3 situations: * * 1. If we find an already existing record, and the length * is the same, cool, we just need to increase the r_refcount * and it is OK. * 2. If we find a hole, just insert it with r_refcount = 1. * 3. If we are in the middle of one extent record, split * it.
*/ if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos &&
set_len <= len) {
trace_ocfs2_increase_refcount_change(
(unsignedlonglong)cpos, set_len,
le32_to_cpu(rec.r_refcount));
ret = ocfs2_change_refcount_rec(handle, ci,
ref_leaf_bh, index,
merge, 1); if (ret) {
mlog_errno(ret); goto out;
}
} elseif (!rec.r_refcount) {
rec.r_refcount = cpu_to_le32(1);
ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos),
1, meta_ac, dealloc); if (ret) {
mlog_errno(ret); goto out;
}
ocfs2_remove_from_cache(ci, ref_leaf_bh);
/* * add the freed block to the dealloc so that it will be freed * when we run dealloc.
*/
ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
le16_to_cpu(rb->rf_suballoc_slot),
le64_to_cpu(rb->rf_suballoc_loc),
le64_to_cpu(rb->rf_blkno),
le16_to_cpu(rb->rf_suballoc_bit)); if (ret) {
mlog_errno(ret); goto out;
}
ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
OCFS2_JOURNAL_ACCESS_WRITE); if (ret) {
mlog_errno(ret); goto out;
}
/* * check whether we need to restore the root refcount block if * there is no leaf extent block at atll.
*/ if (!rb->rf_list.l_next_free_rec) {
BUG_ON(rb->rf_clusters);
/* Remove the leaf refcount block if it contains no refcount record. */ if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) {
ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh,
ref_leaf_bh, meta_ac,
dealloc); if (ret)
mlog_errno(ret);
}
/* Caller must hold refcount tree lock. */ int ocfs2_decrease_refcount(struct inode *inode,
handle_t *handle, u32 cpos, u32 len, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc, intdelete)
{ int ret;
u64 ref_blkno; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree;
BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_get_refcount_block(inode, &ref_blkno); if (ret) {
mlog_errno(ret); goto out;
}
ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree); if (ret) {
mlog_errno(ret); goto out;
}
ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
&ref_root_bh); if (ret) {
mlog_errno(ret); goto out;
}
ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh,
cpos, len, meta_ac, dealloc, delete); if (ret)
mlog_errno(ret);
out:
brelse(ref_root_bh); return ret;
}
/* * Mark the already-existing extent at cpos as refcounted for len clusters. * This adds the refcount extent flag. * * If the existing extent is larger than the request, initiate a * split. An attempt will be made at merging with adjacent extents. * * The caller is responsible for passing down meta_ac if we'll need it.
*/ staticint ocfs2_mark_extent_refcounted(struct inode *inode, struct ocfs2_extent_tree *et,
handle_t *handle, u32 cpos,
u32 len, u32 phys, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc)
{ int ret;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
ret = ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
inode->i_ino); goto out;
}
ret = ocfs2_change_extent_flag(handle, et, cpos,
len, phys, meta_ac, dealloc,
OCFS2_EXT_REFCOUNTED, 0); if (ret)
mlog_errno(ret);
out: return ret;
}
/* * Given some contiguous physical clusters, calculate what we need * for modifying their refcount.
*/ staticint ocfs2_calc_refcount_meta_credits(struct super_block *sb, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh,
u64 start_cpos,
u32 clusters, int *meta_add, int *credits)
{ int ret = 0, index, ref_blocks = 0, recs_add = 0;
u64 cpos = start_cpos; struct ocfs2_refcount_block *rb; struct ocfs2_refcount_rec rec; struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL;
u32 len;
while (clusters) {
ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
cpos, clusters, &rec,
&index, &ref_leaf_bh); if (ret) {
mlog_errno(ret); goto out;
}
if (ref_leaf_bh != prev_bh) { /* * Now we encounter a new leaf block, so calculate * whether we need to extend the old leaf.
*/ if (prev_bh) {
rb = (struct ocfs2_refcount_block *)
prev_bh->b_data;
if (le16_to_cpu(rb->rf_records.rl_used) +
recs_add >
le16_to_cpu(rb->rf_records.rl_count))
ref_blocks++;
}
len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
le32_to_cpu(rec.r_clusters)) - cpos; /* * We record all the records which will be inserted to the * same refcount block, so that we can tell exactly whether * we need a new refcount block or not. * * If we will insert a new one, this is easy and only happens * during adding refcounted flag to the extent, so we don't * have a chance of splitting. We just need one record. * * If the refcount rec already exists, that would be a little * complicated. we may have to: * 1) split at the beginning if the start pos isn't aligned. * we need 1 more record in this case. * 2) split int the end if the end pos isn't aligned. * we need 1 more record in this case. * 3) split in the middle because of file system fragmentation. * we need 2 more records in this case(we can't detect this * beforehand, so always think of the worst case).
*/ if (rec.r_refcount) {
recs_add += 2; /* Check whether we need a split at the beginning. */ if (cpos == start_cpos &&
cpos != le64_to_cpu(rec.r_cpos))
recs_add++;
/* Check whether we need a split in the end. */ if (cpos + clusters < le64_to_cpu(rec.r_cpos) +
le32_to_cpu(rec.r_clusters))
recs_add++;
} else
recs_add++;
if (prev_bh) {
rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
if (le16_to_cpu(rb->rf_records.rl_used) + recs_add >
le16_to_cpu(rb->rf_records.rl_count))
ref_blocks++;
*credits += 1;
}
if (!ref_blocks) goto out;
*meta_add += ref_blocks;
*credits += ref_blocks;
/* * So we may need ref_blocks to insert into the tree. * That also means we need to change the b-tree and add that number * of records since we never merge them. * We need one more block for expansion since the new created leaf * block is also full and needs split.
*/
rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) { struct ocfs2_extent_tree et;
/* * For refcount tree, we will decrease some contiguous clusters * refcount count, so just go through it to see how many blocks * we gonna touch and whether we need to create new blocks. * * Normally the refcount blocks store these refcount should be * contiguous also, so that we can get the number easily. * We will at most add split 2 refcount records and 2 more * refcount blocks, so just check it in a rough way. * * Caller must hold refcount tree lock.
*/ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
u64 refcount_loc,
u64 phys_blkno,
u32 clusters, int *credits, int *ref_blocks)
{ int ret; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree;
u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
ret = ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
inode->i_ino); goto out;
}
BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
refcount_loc, &tree); if (ret) {
mlog_errno(ret); goto out;
}
ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc,
&ref_root_bh); if (ret) {
mlog_errno(ret); goto out;
}
ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
&tree->rf_ci,
ref_root_bh,
start_cpos, clusters,
ref_blocks, credits); if (ret) {
mlog_errno(ret); goto out;
}
/* * Given an extent that starts at 'start' and an I/O that starts at 'cpos', * find an offset (start + (n * contig_clusters)) that is closest to cpos * while still being less than or equal to it. * * The goal is to break the extent at a multiple of contig_clusters.
*/ staticinlineunsignedint ocfs2_cow_align_start(struct super_block *sb, unsignedint start, unsignedint cpos)
{
BUG_ON(start > cpos);
/* * Given a cluster count of len, pad it out so that it is a multiple * of contig_clusters.
*/ staticinlineunsignedint ocfs2_cow_align_length(struct super_block *sb, unsignedint len)
{ unsignedint padded =
(len + (ocfs2_cow_contig_clusters(sb) - 1)) &
ocfs2_cow_contig_mask(sb);
/* Did we wrap? */ if (padded < len)
padded = UINT_MAX;
return padded;
}
/* * Calculate out the start and number of virtual clusters we need to CoW. * * cpos is virtual start cluster position we want to do CoW in a * file and write_len is the cluster length. * max_cpos is the place where we want to stop CoW intentionally. * * Normal we will start CoW from the beginning of extent record containing cpos. * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we * get good I/O from the resulting extent tree.
*/ staticint ocfs2_refcount_cal_cow_clusters(struct inode *inode, struct ocfs2_extent_list *el,
u32 cpos,
u32 write_len,
u32 max_cpos,
u32 *cow_start,
u32 *cow_len)
{ int ret = 0; int tree_height = le16_to_cpu(el->l_tree_depth), i; struct buffer_head *eb_bh = NULL; struct ocfs2_extent_block *eb = NULL; struct ocfs2_extent_rec *rec; unsignedint want_clusters, rec_end = 0; int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb); int leaf_clusters;
BUG_ON(cpos + write_len > max_cpos);
if (tree_height > 0) {
ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh); if (ret) {
mlog_errno(ret); goto out;
}
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
if (el->l_tree_depth) {
ret = ocfs2_error(inode->i_sb, "Inode %lu has non zero tree depth in leaf block %llu\n",
inode->i_ino,
(unsignedlonglong)eb_bh->b_blocknr); goto out;
}
}
*cow_len = 0; for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
rec = &el->l_recs[i];
if (ocfs2_is_empty_extent(rec)) {
mlog_bug_on_msg(i != 0, "Inode %lu has empty record in " "index %d\n", inode->i_ino, i); continue;
}
if (le32_to_cpu(rec->e_cpos) +
le16_to_cpu(rec->e_leaf_clusters) <= cpos) continue;
if (*cow_len == 0) { /* * We should find a refcounted record in the * first pass.
*/
BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED));
*cow_start = le32_to_cpu(rec->e_cpos);
}
/* * If we encounter a hole, a non-refcounted record or * pass the max_cpos, stop the search.
*/ if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) ||
(*cow_len && rec_end != le32_to_cpu(rec->e_cpos)) ||
(max_cpos <= le32_to_cpu(rec->e_cpos))) break;
/* * How many clusters do we actually need from * this extent? First we see how many we actually * need to complete the write. If that's smaller * than contig_clusters, we try for contig_clusters.
*/ if (!*cow_len)
want_clusters = write_len; else
want_clusters = (cpos + write_len) -
(*cow_start + *cow_len); if (want_clusters < contig_clusters)
want_clusters = contig_clusters;
/* * If the write does not cover the whole extent, we * need to calculate how we're going to split the extent. * We try to do it on contig_clusters boundaries. * * Any extent smaller than contig_clusters will be * CoWed in its entirety.
*/ if (leaf_clusters <= contig_clusters)
*cow_len += leaf_clusters; elseif (*cow_len || (*cow_start == cpos)) { /* * This extent needs to be CoW'd from its * beginning, so all we have to do is compute * how many clusters to grab. We align * want_clusters to the edge of contig_clusters * to get better I/O.
*/
want_clusters = ocfs2_cow_align_length(inode->i_sb,
want_clusters);
if (leaf_clusters < want_clusters)
*cow_len += leaf_clusters; else
*cow_len += want_clusters;
} elseif ((*cow_start + contig_clusters) >=
(cpos + write_len)) { /* * Breaking off contig_clusters at the front * of the extent will cover our write. That's * easy.
*/
*cow_len = contig_clusters;
} elseif ((rec_end - cpos) <= contig_clusters) { /* * Breaking off contig_clusters at the tail of * this extent will cover cpos.
*/
*cow_start = rec_end - contig_clusters;
*cow_len = contig_clusters;
} elseif ((rec_end - cpos) <= want_clusters) { /* * While we can't fit the entire write in this * extent, we know that the write goes from cpos * to the end of the extent. Break that off. * We try to break it at some multiple of * contig_clusters from the front of the extent. * Failing that (ie, cpos is within * contig_clusters of the front), we'll CoW the * entire extent.
*/
*cow_start = ocfs2_cow_align_start(inode->i_sb,
*cow_start, cpos);
*cow_len = rec_end - *cow_start;
} else { /* * Ok, the entire write lives in the middle of * this extent. Let's try to slice the extent up * nicely. Optimally, our CoW region starts at * m*contig_clusters from the beginning of the * extent and goes for n*contig_clusters, * covering the entire write.
*/
*cow_start = ocfs2_cow_align_start(inode->i_sb,
*cow_start, cpos);
/* Have we covered our entire write yet? */ if ((*cow_start + *cow_len) >= (cpos + write_len)) break;
/* * If we reach the end of the extent block and don't get enough * clusters, continue with the next extent block if possible.
*/ if (i + 1 == le16_to_cpu(el->l_next_free_rec) &&
eb && eb->h_next_leaf_blk) {
brelse(eb_bh);
eb_bh = NULL;
ret = ocfs2_read_extent_block(INODE_CACHE(inode),
le64_to_cpu(eb->h_next_leaf_blk),
&eb_bh); if (ret) {
mlog_errno(ret); goto out;
}
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
i = -1;
}
}
out:
brelse(eb_bh); return ret;
}
/* * Prepare meta_ac, data_ac and calculate credits when we want to add some * num_clusters in data_tree "et" and change the refcount for the old * clusters(starting form p_cluster) in the refcount tree. * * Note: * 1. since we may split the old tree, so we at most will need num_clusters + 2 * more new leaf records. * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so * just give data_ac = NULL.
*/ staticint ocfs2_lock_refcount_allocators(struct super_block *sb,
u32 p_cluster, u32 num_clusters, struct ocfs2_extent_tree *et, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh, struct ocfs2_alloc_context **meta_ac, struct ocfs2_alloc_context **data_ac, int *credits)
{ int ret = 0, meta_add = 0; int num_free_extents = ocfs2_num_free_extents(et);
if (num_free_extents < 0) {
ret = num_free_extents;
mlog_errno(ret); goto out;
}
if (num_free_extents < num_clusters + 2)
meta_add =
ocfs2_extend_meta_needed(et->et_root_el);
offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); /* * We only duplicate pages until we reach the page contains i_size - 1. * So trim 'end' to i_size.
*/ if (end > i_size_read(inode))
end = i_size_read(inode);
/* from, to is the offset within the page. */
from = offset & (PAGE_SIZE - 1);
to = PAGE_SIZE; if (map_end & (PAGE_SIZE - 1))
to = map_end & (PAGE_SIZE - 1);
retry:
folio = __filemap_get_folio(mapping, page_index,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS); if (IS_ERR(folio)) {
ret = PTR_ERR(folio);
mlog_errno(ret); break;
}
/* * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty * page, so write it back.
*/ if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) { if (folio_test_dirty(folio)) {
folio_unlock(folio);
folio_put(folio);
if (!folio_test_uptodate(folio)) {
ret = block_read_full_folio(folio, ocfs2_get_block); if (ret) {
mlog_errno(ret); goto unlock;
}
folio_lock(folio);
}
if (folio_buffers(folio)) {
ret = walk_page_buffers(handle, folio_buffers(folio),
from, to, &partial,
ocfs2_clear_cow_buffer); if (ret) {
mlog_errno(ret); goto unlock;
}
}
ocfs2_map_and_dirty_folio(inode, handle, from, to,
folio, 0, &new_block);
folio_mark_accessed(folio);
unlock:
folio_unlock(folio);
folio_put(folio);
offset = map_end; if (ret) break;
}
path = ocfs2_new_path_from_et(et); if (!path) {
ret = -ENOMEM;
mlog_errno(ret); goto out;
}
ret = ocfs2_find_path(et->et_ci, path, cpos); if (ret) {
mlog_errno(ret); goto out;
}
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos); if (index == -1) {
ret = ocfs2_error(sb, "Inode %llu has an extent at cpos %u which can no longer be found\n",
(unsignedlonglong)ino, cpos); goto out;
}
ret = ocfs2_split_extent(handle, et, path, index,
&replace_rec, meta_ac, dealloc); if (ret)
mlog_errno(ret);
/*If the old clusters is unwritten, no need to duplicate. */ if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
ret = context->cow_duplicate_clusters(handle, context->inode,
cpos, old, new, len); if (ret) {
mlog_errno(ret); goto out;
}
}
ret = ocfs2_clear_ext_refcount(handle, &context->data_et,
cpos, new, len, ext_flags,
context->meta_ac, &context->dealloc); if (ret)
mlog_errno(ret);
out: return ret;
}
int ocfs2_cow_sync_writeback(struct super_block *sb, struct inode *inode,
u32 cpos, u32 num_clusters)
{ int ret;
loff_t start, end;
/* * There are many different situation here. * 1. If refcount == 1, remove the flag and don't COW. * 2. If refcount > 1, allocate clusters. * Here we may not allocate r_len once at a time, so continue * until we reach num_clusters.
*/ if (le32_to_cpu(rec.r_refcount) == 1) { delete = 0;
ret = ocfs2_clear_ext_refcount(handle,
&context->data_et,
cpos, p_cluster,
set_len, e_flags,
context->meta_ac,
&context->dealloc); if (ret) {
mlog_errno(ret); goto out_commit;
}
} else { delete = 1;
ret = __ocfs2_claim_clusters(handle,
context->data_ac,
1, set_len,
&new_bit, &new_len); if (ret) {
mlog_errno(ret); goto out_commit;
}
ret = ocfs2_replace_clusters(handle, context,
cpos, p_cluster, new_bit,
new_len, e_flags); if (ret) {
mlog_errno(ret); goto out_commit;
}
set_len = new_len;
}
ret = __ocfs2_decrease_refcount(handle, ref_ci,
context->ref_root_bh,
p_cluster, set_len,
context->meta_ac,
&context->dealloc, delete); if (ret) {
mlog_errno(ret); goto out_commit;
}
/* handle any post_cow action. */ if (context->post_refcount && context->post_refcount->func) {
ret = context->post_refcount->func(context->inode, handle,
context->post_refcount->para); if (ret) {
mlog_errno(ret); goto out_commit;
}
}
/* * Here we should write the new page out first if we are * in write-back mode.
*/ if (context->get_clusters == ocfs2_di_get_clusters) {
ret = ocfs2_cow_sync_writeback(sb, context->inode, cpos,
orig_num_clusters); if (ret)
mlog_errno(ret);
}
out_commit:
ocfs2_commit_trans(osb, handle);
out: if (context->data_ac) {
ocfs2_free_alloc_context(context->data_ac);
context->data_ac = NULL;
} if (context->meta_ac) {
ocfs2_free_alloc_context(context->meta_ac);
context->meta_ac = NULL;
}
brelse(ref_leaf_bh);
if (!ocfs2_refcount_tree(osb)) { return ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
inode->i_ino);
}
ocfs2_init_dealloc_ctxt(&context->dealloc);
while (cow_len) {
ret = context->get_clusters(context, cow_start, &p_cluster,
&num_clusters, &ext_flags); if (ret) {
mlog_errno(ret); break;
}
BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED));
if (cow_len < num_clusters)
num_clusters = cow_len;
ret = ocfs2_make_clusters_writable(inode->i_sb, context,
cow_start, p_cluster,
num_clusters, ext_flags); if (ret) {
mlog_errno(ret); break;
}
ret = ocfs2_replace_cow(context); if (ret)
mlog_errno(ret);
/* * truncate the extent map here since no matter whether we meet with * any error during the action, we shouldn't trust cached extent map * any more.
*/
ocfs2_extent_map_trunc(inode, cow_start);
/* * CoW any and all clusters between cpos and cpos+write_len. * Don't CoW past max_cpos. If this returns successfully, all * clusters between cpos and cpos+write_len are safe to modify.
*/ int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos)
{ int ret = 0;
u32 p_cluster, num_clusters; unsignedint ext_flags;
while (write_len) {
ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
&num_clusters, &ext_flags); if (ret) {
mlog_errno(ret); break;
}
if (write_len < num_clusters)
num_clusters = write_len;
if (ext_flags & OCFS2_EXT_REFCOUNTED) {
ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
num_clusters, max_cpos); if (ret) {
mlog_errno(ret); break;
}
}
/* * Given a xattr value root, calculate the most meta/credits we need for * refcount tree change if we truncate it to 0.
*/ int ocfs2_refcounted_xattr_delete_need(struct inode *inode, struct ocfs2_caching_info *ref_ci, struct buffer_head *ref_root_bh, struct ocfs2_xattr_value_root *xv, int *meta_add, int *credits)
{ int ret = 0, index, ref_blocks = 0;
u32 p_cluster, num_clusters;
u32 cpos = 0, clusters = le32_to_cpu(xv->xr_clusters); struct ocfs2_refcount_block *rb; struct ocfs2_refcount_rec rec; struct buffer_head *ref_leaf_bh = NULL;
while (cpos < clusters) {
ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
&num_clusters, &xv->xr_list,
NULL); if (ret) {
mlog_errno(ret); goto out;
}
cpos += num_clusters;
while (num_clusters) {
ret = ocfs2_get_refcount_rec(ref_ci, ref_root_bh,
p_cluster, num_clusters,
&rec, &index,
&ref_leaf_bh); if (ret) {
mlog_errno(ret); goto out;
}
/* * We really don't know whether the other clusters is in * this refcount block or not, so just take the worst * case that all the clusters are in this block and each * one will split a refcount rec, so totally we need * clusters * 2 new refcount rec.
*/ if (le16_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
le16_to_cpu(rb->rf_records.rl_count))
ref_blocks++;
context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd; /* We need the extra credits for duplicate_clusters by jbd. */
context->extra_credits =
ocfs2_clusters_to_blocks(inode->i_sb, 1) * cow_len;
context->get_clusters = ocfs2_xattr_value_get_clusters;
context->post_refcount = post;
if (!ret && ocfs2_dealloc_has_cluster(&dealloc)) {
ocfs2_schedule_truncate_log_flush(osb, 1);
ocfs2_run_deallocs(osb, &dealloc);
}
out: /* * Empty the extent map so that we may get the right extent * record from the disk.
*/
ocfs2_extent_map_trunc(inode, 0);
cpos = 0; while (cpos < clusters) {
ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster,
&num_clusters, &ext_flags); if (ret) {
mlog_errno(ret); goto out;
} if (p_cluster) {
ret = ocfs2_add_refcounted_extent(t_inode, &et,
ref_ci, ref_root_bh,
cpos, p_cluster,
num_clusters,
ext_flags,
dealloc); if (ret) {
mlog_errno(ret); goto out;
}
}
cpos += num_clusters;
}
out: return ret;
}
/* * change the new file's attributes to the src. * * reflink creates a snapshot of a file, that means the attributes * must be identical except for three exceptions - nlink, ino, and ctime.
*/ staticint ocfs2_complete_reflink(struct inode *s_inode, struct buffer_head *s_bh, struct inode *t_inode, struct buffer_head *t_bh, bool preserve)
{ int ret;
handle_t *handle; struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data; struct ocfs2_dinode *di = (struct ocfs2_dinode *)t_bh->b_data;
loff_t size = i_size_read(s_inode);
handle = ocfs2_start_trans(OCFS2_SB(t_inode->i_sb),
OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret); return ret;
}
ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh,
OCFS2_JOURNAL_ACCESS_WRITE); if (ret) {
mlog_errno(ret); goto out_commit;
}
if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret); goto out;
}
ret = filemap_fdatawrite(inode->i_mapping); if (ret) {
mlog_errno(ret); goto out;
}
ret = ocfs2_attach_refcount_tree(inode, old_bh); if (ret) {
mlog_errno(ret); goto out;
}
inode_lock_nested(new_inode, I_MUTEX_CHILD);
ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
OI_LS_REFLINK_TARGET); if (ret) {
mlog_errno(ret); goto out_unlock;
}
if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) &&
(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { /* * Adjust extent record count to reserve space for extended attribute. * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
*/ struct ocfs2_inode_info *new_oi = OCFS2_I(new_inode);
/* If the security isn't preserved, we need to re-initialize them. */ if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
&new_dentry->d_name); if (error)
mlog_errno(error);
} if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry); if (error)
mlog_errno(error);
}
ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
out: if (new_orphan_inode) { /* * We need to open_unlock the inode no matter whether we * succeed or not, so that other nodes can delete it later.
*/
ocfs2_open_unlock(new_orphan_inode); if (error)
iput(new_orphan_inode);
}
return error;
}
/* * Below here are the bits used by OCFS2_IOC_REFLINK() to fake * sys_reflink(). This will go away when vfs_reflink() exists in * fs/namei.c.
*/
/* copied from may_create in VFS. */ staticinlineint ocfs2_may_create(struct inode *dir, struct dentry *child)
{ if (d_really_is_positive(child)) return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; return inode_permission(&nop_mnt_idmap, dir, MAY_WRITE | MAY_EXEC);
}
/** * ocfs2_vfs_reflink - Create a reference-counted link * * @old_dentry: source dentry + inode * @dir: directory to create the target * @new_dentry: target dentry * @preserve: if true, preserve all file attributes
*/ staticint ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, bool preserve)
{ struct inode *inode = d_inode(old_dentry); int error;
if (!inode) return -ENOENT;
error = ocfs2_may_create(dir, new_dentry); if (error) return error;
if (dir->i_sb != inode->i_sb) return -EXDEV;
/* * A reflink to an append-only or immutable file cannot be created.
*/ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return -EPERM;
/* Only regular files can be reflinked. */ if (!S_ISREG(inode->i_mode)) return -EPERM;
/* * If the caller wants to preserve ownership, they require the * rights to do so.
*/ if (preserve) { if (!uid_eq(current_fsuid(), inode->i_uid) && !capable(CAP_CHOWN)) return -EPERM; if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN)) return -EPERM;
}
/* * If the caller is modifying any aspect of the attributes, they * are not creating a snapshot. They need read permission on the * file.
*/ if (!preserve) {
error = inode_permission(&nop_mnt_idmap, inode, MAY_READ); if (error) return error;
}
inode_lock(inode);
error = dquot_initialize(dir); if (!error)
error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
inode_unlock(inode); if (!error)
fsnotify_create(dir, new_dentry); return error;
} /* * Most codes are copied from sys_linkat.
*/ int ocfs2_reflink_ioctl(struct inode *inode, constchar __user *oldname, constchar __user *newname, bool preserve)
{ struct dentry *new_dentry; struct path old_path, new_path; int error;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP;
/* * If we're reflinking the entire file and the source is inline * data, just copy the contents.
*/ if (pos_in == pos_out && pos_in == 0 && len == i_size_read(s_inode) &&
i_size_read(t_inode) <= len &&
(OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) {
ret = ocfs2_duplicate_inline_data(s_inode, s_bh, t_inode, t_bh); if (ret)
mlog_errno(ret); goto out;
}
/* * If both inodes belong to two different refcount groups then * forget it because we don't know how (or want) to go merging * refcount trees.
*/
ret = -EOPNOTSUPP; if (ocfs2_is_refcount_inode(s_inode) &&
ocfs2_is_refcount_inode(t_inode) &&
le64_to_cpu(dis->i_refcount_loc) !=
le64_to_cpu(dit->i_refcount_loc)) goto out;
/* Neither inode has a refcount tree. Add one to s_inode. */ if (!ocfs2_is_refcount_inode(s_inode) &&
!ocfs2_is_refcount_inode(t_inode)) {
ret = ocfs2_create_refcount_tree(s_inode, s_bh); if (ret) {
mlog_errno(ret); goto out;
}
}
/* Ensure that both inodes end up with the same refcount tree. */ if (!ocfs2_is_refcount_inode(s_inode)) {
ret = ocfs2_set_refcount_tree(s_inode, s_bh,
le64_to_cpu(dit->i_refcount_loc)); if (ret) {
mlog_errno(ret); goto out;
}
} if (!ocfs2_is_refcount_inode(t_inode)) {
ret = ocfs2_set_refcount_tree(t_inode, t_bh,
le64_to_cpu(dis->i_refcount_loc)); if (ret) {
mlog_errno(ret); goto out;
}
}
/* Turn off inline data in the dest file. */ if (OCFS2_I(t_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
ret = ocfs2_convert_inline_data_to_extents(t_inode, t_bh); if (ret) {
mlog_errno(ret); goto out;
}
}
/* We always want to lock the one with the lower lockid first. */ if (oi1->ip_blkno > oi2->ip_blkno)
mlog_errno(-ENOLCK);
/* lock id1 */
status = ocfs2_inode_lock_nested(inode1, &bh1, 1,
OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT)
mlog_errno(status); goto out_rw2;
}
/* lock id2 */ if (!same_inode) {
status = ocfs2_inode_lock_nested(inode2, &bh2, 1,
OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT)
mlog_errno(status); goto out_cl1;
}
} else {
bh2 = bh1;
}
/* * If we swapped inode order above, we have to swap the buffer heads * before passing them back to the caller.
*/ if (need_swap)
swap(bh1, bh2);
*bh_s = bh1;
*bh_t = bh2;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.