/* * ialloc.c contains the inodes allocation and deallocation routines
*/
/* * The free inodes are managed by bitmaps. A file system contains several * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap * block for inodes, N blocks for the inode table and data blocks. * * The file system contains group descriptors which are located after the * super block. Each descriptor contains the number of the bitmap block and * the free blocks count in the block.
*/
/* * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. * * Return buffer_head of bitmap on success or NULL.
*/ staticstruct buffer_head *
read_inode_bitmap(struct super_block * sb, unsignedlong block_group)
{ struct ext2_group_desc *desc; struct buffer_head *bh = NULL;
desc = ext2_get_group_desc(sb, block_group, NULL); if (!desc) goto error_out;
staticvoid ext2_release_inode(struct super_block *sb, int group, int dir)
{ struct ext2_group_desc * desc; struct buffer_head *bh;
desc = ext2_get_group_desc(sb, group, &bh); if (!desc) {
ext2_error(sb, "ext2_release_inode", "can't get descriptor for group %d", group); return;
}
spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
le16_add_cpu(&desc->bg_free_inodes_count, 1); if (dir)
le16_add_cpu(&desc->bg_used_dirs_count, -1);
spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
percpu_counter_inc(&EXT2_SB(sb)->s_freeinodes_counter); if (dir)
percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
mark_buffer_dirty(bh);
}
/* * NOTE! When we get the inode, we're the only people * that have access to it, and as such there are no * race conditions we have to worry about. The inode * is not on the hash-lists, and it cannot be reached * through the filesystem because the directory entry * has been deleted earlier. * * HOWEVER: we must make sure that we get no aliases, * which means that we have to call "clear_inode()" * _before_ we mark the inode not in use in the inode * bitmaps. Otherwise a newly created file might use * the same inode number (not actually the same pointer * though), and then we'd have two inodes sharing the * same inode number and space on the harddisk.
*/ void ext2_free_inode (struct inode * inode)
{ struct super_block * sb = inode->i_sb; int is_directory; unsignedlong ino; struct buffer_head *bitmap_bh; unsignedlong block_group; unsignedlong bit; struct ext2_super_block * es;
/* * Note: we must free any quota before locking the superblock, * as writing the quota to disk may need the lock as well.
*/ /* Quota is already initialized in iput() */
dquot_free_inode(inode);
dquot_drop(inode);
es = EXT2_SB(sb)->s_es;
is_directory = S_ISDIR(inode->i_mode);
/* Ok, now we can actually update the inode bitmaps.. */ if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group),
bit, (void *) bitmap_bh->b_data))
ext2_error (sb, "ext2_free_inode", "bit already cleared for inode %lu", ino); else
ext2_release_inode(sb, block_group, is_directory);
mark_buffer_dirty(bitmap_bh); if (sb->s_flags & SB_SYNCHRONOUS)
sync_dirty_buffer(bitmap_bh);
brelse(bitmap_bh);
}
/* * We perform asynchronous prereading of the new inode's inode block when * we create the inode, in the expectation that the inode will be written * back soon. There are two reasons: * * - When creating a large number of files, the async prereads will be * nicely merged into large reads * - When writing out a large number of inodes, we don't need to keep on * stalling the writes while we read the inode block. * * FIXME: ext2_get_group_desc() needs to be simplified.
*/ staticvoid ext2_preread_inode(struct inode *inode)
{ unsignedlong block_group; unsignedlong offset; unsignedlong block; struct ext2_group_desc * gdp;
block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL); if (gdp == NULL) return;
/* * Figure out the offset within the block group inode table
*/
offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
EXT2_INODE_SIZE(inode->i_sb);
block = le32_to_cpu(gdp->bg_inode_table) +
(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
sb_breadahead(inode->i_sb, block);
}
/* * There are two policies for allocating an inode. If the new inode is * a directory, then a forward search is made for a block group with both * free space and a low directory-to-inode ratio; if that fails, then of * the groups with above-average free space, that group with the fewest * directories already is chosen. * * For other inodes, search forward from the parent directory\'s block * group to find a free inode.
*/ staticint find_group_dir(struct super_block *sb, struct inode *parent)
{ int ngroups = EXT2_SB(sb)->s_groups_count; int avefreei = ext2_count_free_inodes(sb) / ngroups; struct ext2_group_desc *desc, *best_desc = NULL; int group, best_group = -1;
for (group = 0; group < ngroups; group++) {
desc = ext2_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) continue; if (!best_desc ||
(le16_to_cpu(desc->bg_free_blocks_count) >
le16_to_cpu(best_desc->bg_free_blocks_count))) {
best_group = group;
best_desc = desc;
}
}
return best_group;
}
/* * Orlov's allocator for directories. * * We always try to spread first-level directories. * * If there are blockgroups with both free inodes and free blocks counts * not worse than average we return one with smallest directory count. * Otherwise we simply return a random group. * * For the rest rules look so: * * It's OK to put directory into a group unless * it has too many directories already (max_dirs) or * it has too few free inodes left (min_inodes) or * it has too few free blocks left (min_blocks) or * it's already running too large debt (max_debt). * Parent's group is preferred, if it doesn't satisfy these * conditions we search cyclically through the rest. If none * of the groups look good we just look for a group with more * free inodes than average (starting at parent's group). * * Debt is incremented each time we allocate a directory and decremented * when we allocate an inode, within 0--255.
*/
#define INODE_COST 64 #define BLOCK_COST 256
staticint find_group_orlov(struct super_block *sb, struct inode *parent)
{ int parent_group = EXT2_I(parent)->i_block_group; struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = sbi->s_es; int ngroups = sbi->s_groups_count; int inodes_per_group = EXT2_INODES_PER_GROUP(sb); int freei; int avefreei; int free_blocks; int avefreeb; int blocks_per_dir; int ndirs; int max_debt, max_dirs, min_blocks, min_inodes; int group = -1, i; struct ext2_group_desc *desc;
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext2_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (sbi->s_debts[group] >= max_debt) continue; if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) continue; if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) continue; if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) continue; goto found;
}
fallback: for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext2_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) goto found;
}
if (avefreei) { /* * The free-inodes counter is approximate, and for really small * filesystems the above test can fail to find any blockgroups
*/
avefreei = 0; goto fallback;
}
return -1;
found: return group;
}
staticint find_group_other(struct super_block *sb, struct inode *parent)
{ int parent_group = EXT2_I(parent)->i_block_group; int ngroups = EXT2_SB(sb)->s_groups_count; struct ext2_group_desc *desc; int group, i;
/* * Try to place the inode in its parent directory
*/
group = parent_group;
desc = ext2_get_group_desc (sb, group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
le16_to_cpu(desc->bg_free_blocks_count)) goto found;
/* * We're going to place this inode in a different blockgroup from its * parent. We want to cause files in a common directory to all land in * the same blockgroup. But we want files which are in a different * directory which shares a blockgroup with our parent to land in a * different blockgroup. * * So add our directory's i_ino into the starting point for the hash.
*/
group = (group + parent->i_ino) % ngroups;
/* * Use a quadratic hash to find a group with a free inode and some * free blocks.
*/ for (i = 1; i < ngroups; i <<= 1) {
group += i; if (group >= ngroups)
group -= ngroups;
desc = ext2_get_group_desc (sb, group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
le16_to_cpu(desc->bg_free_blocks_count)) goto found;
}
/* * That failed: try linear search for a free inode, even if that group * has no free blocks.
*/
group = parent_group; for (i = 0; i < ngroups; i++) { if (++group >= ngroups)
group = 0;
desc = ext2_get_group_desc (sb, group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count)) goto found;
}
sb = dir->i_sb;
inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM);
ei = EXT2_I(inode);
sbi = EXT2_SB(sb);
es = sbi->s_es; if (S_ISDIR(mode)) { if (test_opt(sb, OLDALLOC))
group = find_group_dir(sb, dir); else
group = find_group_orlov(sb, dir);
} else
group = find_group_other(sb, dir);
if (group == -1) {
err = -ENOSPC; goto fail;
}
for (i = 0; i < sbi->s_groups_count; i++) {
gdp = ext2_get_group_desc(sb, group, &bh2); if (!gdp) { if (++group == sbi->s_groups_count)
group = 0; continue;
}
brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, group); if (!bitmap_bh) {
err = -EIO; goto fail;
}
ino = 0;
repeat_in_this_group:
ino = ext2_find_next_zero_bit((unsignedlong *)bitmap_bh->b_data,
EXT2_INODES_PER_GROUP(sb), ino); if (ino >= EXT2_INODES_PER_GROUP(sb)) { /* * Rare race: find_group_xx() decided that there were * free inodes in this group, but by the time we tried * to allocate one, they're all gone. This can also * occur because the counters which find_group_orlov() * uses are approximate. So just go and search the * next block group.
*/ if (++group == sbi->s_groups_count)
group = 0; continue;
} if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group),
ino, bitmap_bh->b_data)) { /* we lost this inode */ if (++ino >= EXT2_INODES_PER_GROUP(sb)) { /* this group is exhausted, try next group */ if (++group == sbi->s_groups_count)
group = 0; continue;
} /* try to find free inode in the same group */ goto repeat_in_this_group;
} goto got;
}
/* * Scanned all blockgroups.
*/
brelse(bitmap_bh);
err = -ENOSPC; goto fail;
got:
mark_buffer_dirty(bitmap_bh); if (sb->s_flags & SB_SYNCHRONOUS)
sync_dirty_buffer(bitmap_bh);
brelse(bitmap_bh);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.