// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*/
/* * Implements Extendible Hashing as described in: * "Extendible Hashing" by Fagin, et al in * __ACM Trans. on Database Systems__, Sept 1979. * * * Here's the layout of dirents which is essentially the same as that of ext2 * within a single block. The field de_name_len is the number of bytes * actually required for the name (no null terminator). The field de_rec_len * is the number of bytes allocated to the dirent. The offset of the next * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is * deleted, the preceding dirent inherits its allocated space, ie * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained * by adding de_rec_len to the current dirent, this essentially causes the * deleted dirent to get jumped over when iterating through all the dirents. * * When deleting the first dirent in a block, there is no previous dirent so * the field de_ino is set to zero to designate it as deleted. When allocating * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the * first dirent has (de_ino == 0) and de_rec_len is large enough, this first * dirent is allocated. Otherwise it must go through all the 'used' dirents * searching for one in which the amount of total space minus the amount of * used space will provide enough space for the new dirent. * * There are two types of blocks in which dirents reside. In a stuffed dinode, * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the * beginning of the leaf block. The dirents reside in leaves when * * dip->i_diskflags & GFS2_DIF_EXHASH is true * * Otherwise, the dirents are "linear", within a single stuffed dinode block. * * When the dirents are in leaves, the actual contents of the directory file are * used as an array of 64-bit block pointers pointing to the leaf blocks. The * dirents are NOT in the directory file itself. There can be more than one * block pointer in the array that points to the same leaf. In fact, when a * directory is first converted from linear to exhash, all of the pointers * point to the same leaf. * * When a leaf is completely full, the size of the hash table can be * doubled unless it is already at the maximum size which is hard coded into * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list, * but never before the maximum hash table size has been reached.
*/
/** * gfs2_dir_write_data - Write directory information to the inode * @ip: The GFS2 inode * @buf: The buffer containing information to be written * @offset: The file offset to start writing at * @size: The amount of data to write * * Returns: The number of bytes correctly written or error code
*/ staticint gfs2_dir_write_data(struct gfs2_inode *ip, constchar *buf,
u64 offset, unsignedint size)
{ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh;
u64 lblock, dblock;
u32 extlen = 0; unsignedint o; int copied = 0; int error = 0; boolnew = false;
/** * gfs2_dir_read_data - Read a data from a directory inode * @ip: The GFS2 Inode * @buf: The buffer to place result into * @size: Amount of data to transfer * * Returns: The amount of data actually copied or the error
*/ staticint gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf, unsignedint size)
{ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
u64 lblock, dblock;
u32 extlen = 0; unsignedint o; int copied = 0; int error = 0;
if (gfs2_is_stuffed(ip)) return gfs2_dir_read_stuffed(ip, buf, size);
if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) return -EINVAL;
lblock = 0;
o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
while (copied < size) { unsignedint amount; struct buffer_head *bh;
hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN); if (hc == NULL)
hc = __vmalloc(hsize, GFP_NOFS);
if (hc == NULL) return ERR_PTR(-ENOMEM);
ret = gfs2_dir_read_data(ip, hc, hsize); if (ret < 0) {
kvfree(hc); return ERR_PTR(ret);
}
spin_lock(&inode->i_lock); if (likely(!ip->i_hash_cache)) {
ip->i_hash_cache = hc;
hc = NULL;
}
spin_unlock(&inode->i_lock);
kvfree(hc);
return ip->i_hash_cache;
}
/** * gfs2_dir_hash_inval - Invalidate dir hash * @ip: The directory inode * * Must be called with an exclusive glock, or during glock invalidation.
*/ void gfs2_dir_hash_inval(struct gfs2_inode *ip)
{
__be64 *hc;
spin_lock(&ip->i_inode.i_lock);
hc = ip->i_hash_cache;
ip->i_hash_cache = NULL;
spin_unlock(&ip->i_inode.i_lock);
/* Look for the dirent that contains the offset specified in data. Once we
* find that dirent, there must be space available there for the new dirent */ staticint gfs2_dirent_find_offset(conststruct gfs2_dirent *dent, conststruct qstr *name, void *ptr)
{ unsigned required = GFS2_DIRENT_SIZE(name->len); unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); unsigned totlen = be16_to_cpu(dent->de_rec_len);
if (ptr < (void *)dent || ptr >= (void *)dent + totlen) return 0; if (gfs2_dirent_sentinel(dent))
actual = 0; if (ptr < (void *)dent + actual) return -1; if ((void *)dent + totlen >= ptr + required) return 1; return -1;
}
/* * Other possible things to check: * - Inode located within filesystem size (and on valid block) * - Valid directory entry type * Not sure how heavy-weight we want to make this... could also check * hash is correct for example, but that would take a lot of extra time. * For now the most important thing is to check that the various sizes * are correct.
*/ staticint gfs2_check_dirent(struct gfs2_sbd *sdp, struct gfs2_dirent *dent, unsignedint offset, unsignedint size, unsignedint len, int first)
{ constchar *msg = "gfs2_dirent too small"; if (unlikely(size < sizeof(struct gfs2_dirent))) goto error;
msg = "gfs2_dirent misaligned"; if (unlikely(offset & 0x7)) goto error;
msg = "gfs2_dirent points beyond end of block"; if (unlikely(offset + size > len)) goto error;
msg = "zero inode number"; if (unlikely(!first && gfs2_dirent_sentinel(dent))) goto error;
msg = "name length is greater than space in dirent"; if (!gfs2_dirent_sentinel(dent) &&
unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
size)) goto error; return 0;
error:
fs_warn(sdp, "%s: %s (%s)\n",
__func__, msg, first ? "first in block" : "not first in block"); return -EIO;
}
if (unlikely(rec_len < sizeof(struct gfs2_dirent))) {
gfs2_consist_inode(dip); return -EIO;
}
ptr += rec_len; if (ptr < end_p) return rec_len; if (ptr == end_p) return -ENOENT;
gfs2_consist_inode(dip); return -EIO;
}
/** * dirent_next - Next dirent * @dip: the directory * @bh: The buffer * @dent: Pointer to list of dirents * * Returns: 0 on success, error code otherwise
*/
if (gfs2_dirent_sentinel(cur)) {
gfs2_consist_inode(dip); return;
}
gfs2_trans_add_meta(dip->i_gl, bh);
/* If there is no prev entry, this is the first entry in the block. The de_rec_len is already as big as it needs to be. Just zero
out the inode number and return. */
/* * Takes a dent from which to grab space as an argument. Returns the * newly created dent.
*/ staticstruct gfs2_dirent *gfs2_init_dirent(struct inode *inode, struct gfs2_dirent *dent, conststruct qstr *name, struct buffer_head *bh)
{ unsigned offset = 0;
/** * get_leaf_nr - Get a leaf number associated with the index * @dip: The GFS2 inode * @index: hash table index of the targeted leaf * @leaf_out: Resulting leaf block number * * Returns: 0 on success, error code otherwise
*/
/** * dir_make_exhash - Convert a stuffed directory into an ExHash directory * @inode: The directory inode to be converted to exhash * * Returns: 0 on success, error code otherwise
*/
/** * dir_split_leaf - Split a leaf block into two * @inode: The directory inode to be split * @name: name of the dirent we're trying to insert * * Returns: 0 on success, error code on failure
*/
/* Compute the start and len of leaf pointers in the hash table. */
len = BIT(dip->i_depth - be16_to_cpu(oleaf->lf_depth));
half_len = len >> 1; if (!half_len) {
fs_warn(GFS2_SB(inode), "i_depth %u lf_depth %u index %u\n",
dip->i_depth, be16_to_cpu(oleaf->lf_depth), index);
gfs2_consist_inode(dip);
error = -EIO; goto fail_brelse;
}
start = (index & ~(len - 1));
/* Change the pointers. Don't bother distinguishing stuffed from non-stuffed.
This code is complicated enough already. */
lp = kmalloc_array(half_len, sizeof(__be64), GFP_NOFS); if (!lp) {
error = -ENOMEM; goto fail_brelse;
}
/* Change the pointers */ for (x = 0; x < half_len; x++)
lp[x] = cpu_to_be64(bn);
if (hash_a > hash_b)
ret = 1; elseif (hash_a < hash_b)
ret = -1; else { unsignedint len_a = be16_to_cpu(dent_a->de_name_len); unsignedint len_b = be16_to_cpu(dent_b->de_name_len);
if (len_a > len_b)
ret = 1; elseif (len_a < len_b)
ret = -1; else
ret = memcmp(dent_a + 1, dent_b + 1, len_a);
}
return ret;
}
/** * do_filldir_main - read out directory entries * @dip: The GFS2 inode * @ctx: what to feed the entries to * @darr: an array of struct gfs2_dirent pointers to read * @entries: the number of entries in darr * @sort_start: index of the directory array to start our sort * @copied: pointer to int that's non-zero if a entry has been copied out * * Jump through some hoops to make sure that if there are hash collsions, * they are read out at the beginning of a buffer. We want to minimize * the possibility that they will fall into different readdir buffers or * that someone will want to seek to that location. * * Returns: errno, >0 if the actor tells you to stop
*/
/* Increment the ctx->pos by one, so the next time we come into the do_filldir fxn, we get the next entry instead of the last one in the
current leaf */
error = -ENOMEM; /* * The extra 99 entries are not normally used, but are a buffer * zone in case the number of entries in the leaf is corrupt. * 99 is the maximum number of entries that can fit in a single * leaf block.
*/
larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *)); if (!larr) goto out;
darr = (struct gfs2_dirent **)(larr + leaves);
g.pdent = (conststruct gfs2_dirent **)darr;
g.offset = 0;
lfn = leaf_no;
/** * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks. * @inode: the directory inode * @hsize: hash table size * @index: index into the hash table * @f_ra: read-ahead parameters * * Note: we can't calculate each index like dir_e_read can because we don't * have the leaf, and therefore we don't have the depth, and therefore we * don't have the length. So we have to just read enough ahead to make up * for the loss of information.
*/ staticvoid gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, struct file_ra_state *f_ra)
{ struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_glock *gl = ip->i_gl; struct buffer_head *bh;
u64 blocknr = 0, last; unsigned count;
/* First check if we've already read-ahead for the whole range. */ if (index + MAX_RA_BLOCKS < f_ra->start) return;
f_ra->start = max((pgoff_t)index, f_ra->start); for (count = 0; count < MAX_RA_BLOCKS; count++) { if (f_ra->start >= hsize) /* if exceeded the hash table */ break;
last = blocknr;
blocknr = be64_to_cpu(ip->i_hash_cache[f_ra->start]);
f_ra->start++; if (blocknr == last) continue;
/** * dir_e_read - Reads the entries from a directory into a filldir buffer * @inode: the directory inode * @ctx: actor to feed the entries to * @f_ra: read-ahead parameters * * Returns: errno
*/
if (dip->i_diskflags & GFS2_DIF_EXHASH) return dir_e_read(inode, ctx, f_ra);
if (!gfs2_is_stuffed(dip)) {
gfs2_consist_inode(dip); return -EIO;
}
error = gfs2_meta_inode_buffer(dip, &dibh); if (error) return error;
error = -ENOMEM; /* 96 is max number of dirents which can be stuffed into an inode */
darr = kmalloc_array(96, sizeof(struct gfs2_dirent *), GFP_NOFS); if (darr) {
g.pdent = (conststruct gfs2_dirent **)darr;
g.offset = 0;
dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size,
gfs2_dirent_gather, NULL, &g); if (IS_ERR(dent)) {
error = PTR_ERR(dent); goto out;
} if (dip->i_entries != g.offset) {
fs_warn(sdp, "Number of entries corrupt in dir %llu, " "ip->i_entries (%u) != g.offset (%u)\n",
(unsignedlonglong)dip->i_no_addr,
dip->i_entries,
g.offset);
gfs2_consist_inode(dip);
error = -EIO; goto out;
}
gfs2_set_cookies(sdp, dibh, 0, darr, dip->i_entries);
error = do_filldir_main(dip, ctx, darr,
dip->i_entries, 0, &copied);
out:
kfree(darr);
}
if (error > 0)
error = 0;
brelse(dibh);
return error;
}
/** * gfs2_dir_search - Search a directory * @dir: The GFS2 directory inode * @name: The name we are looking up * @fail_on_exist: Fail if the name exists rather than looking it up * * This routine searches a directory for a file or another directory. * Assumes a glock is held on dip. * * Returns: errno
*/
int gfs2_dir_check(struct inode *dir, conststruct qstr *name, conststruct gfs2_inode *ip)
{ struct buffer_head *bh; struct gfs2_dirent *dent; int ret = -ENOENT;
dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); if (dent) { if (IS_ERR(dent)) return PTR_ERR(dent); if (ip) { if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr) goto out; if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
ip->i_no_formal_ino) goto out; if (unlikely(IF2DT(ip->i_inode.i_mode) !=
be16_to_cpu(dent->de_type))) {
gfs2_consist_inode(GFS2_I(dir));
ret = -EIO; goto out;
}
}
ret = 0;
out:
brelse(bh);
} return ret;
}
/** * dir_new_leaf - Add a new leaf onto hash chain * @inode: The directory * @name: The name we are adding * * This adds a new dir leaf onto an existing leaf when there is not * enough space to add a new dir entry. This is a last resort after * we've expanded the hash table to max size and also split existing * leaf blocks, so it will only occur for very large directories. * * The dist parameter is set to 1 for leaf blocks directly attached * to the hash table, 2 for one layer of indirection, 3 for two layers * etc. We are thus able to tell the difference between an old leaf * with dist set to zero (i.e. "don't know") and a new one where we * set this information for debug/fsck purposes. * * Returns: 0 on success, or -ve on error
*/
/** * gfs2_dir_add - Add new filename into directory * @inode: The directory inode * @name: The new name * @nip: The GFS2 inode to be linked in to the directory * @da: The directory addition info * * If the call to gfs2_diradd_alloc_required resulted in there being * no need to allocate any new directory blocks, then it will contain * a pointer to the directory entry and the bh in which it resides. We * can use that without having to repeat the search. If there was no * free space, then we must now create more space. * * Returns: 0 on success, error code on failure
*/
while(1) { if (da->bh == NULL) {
dent = gfs2_dirent_search(inode, name,
gfs2_dirent_find_space, &bh);
} if (dent) { if (IS_ERR(dent)) return PTR_ERR(dent);
dent = gfs2_init_dirent(inode, dent, name, bh);
gfs2_inum_out(nip, dent);
dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip));
tv = inode_set_ctime_current(&ip->i_inode); if (ip->i_diskflags & GFS2_DIF_EXHASH) {
leaf = (struct gfs2_leaf *)bh->b_data;
be16_add_cpu(&leaf->lf_entries, 1);
leaf->lf_nsec = cpu_to_be32(tv.tv_nsec);
leaf->lf_sec = cpu_to_be64(tv.tv_sec);
}
da->dent = NULL;
da->bh = NULL;
brelse(bh);
ip->i_entries++;
inode_set_mtime_to_ts(&ip->i_inode, tv); if (S_ISDIR(nip->i_inode.i_mode))
inc_nlink(&ip->i_inode);
mark_inode_dirty(inode);
error = 0; break;
} if (!(ip->i_diskflags & GFS2_DIF_EXHASH)) {
error = dir_make_exhash(inode); if (error) break; continue;
}
error = dir_split_leaf(inode, name); if (error == 0) continue; if (error < 0) break; if (ip->i_depth < GFS2_DIR_MAX_DEPTH) {
error = dir_double_exhash(ip); if (error) break;
error = dir_split_leaf(inode, name); if (error < 0) break; if (error == 0) continue;
}
error = dir_new_leaf(inode, name); if (!error) continue;
error = -ENOSPC; break;
} return error;
}
/** * gfs2_dir_del - Delete a directory entry * @dip: The GFS2 inode * @dentry: The directory entry we want to delete * * Returns: 0 on success, error code on failure
*/
/* Returns _either_ the entry (if its first in block) or the
previous entry otherwise */
dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh); if (!dent) {
gfs2_consist_inode(dip); return -EIO;
} if (IS_ERR(dent)) {
gfs2_consist_inode(dip); return PTR_ERR(dent);
} /* If not first in block, adjust pointers accordingly */ if (gfs2_dirent_find(dent, name, NULL) == 0) {
prev = dent;
dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len));
}
if (!dip->i_entries)
gfs2_consist_inode(dip);
dip->i_entries--;
inode_set_mtime_to_ts(&dip->i_inode, tv); if (d_is_dir(dentry))
drop_nlink(&dip->i_inode);
mark_inode_dirty(&dip->i_inode);
return 0;
}
/** * gfs2_dir_mvino - Change inode number of directory entry * @dip: The GFS2 directory inode * @filename: the filename to be moved * @nip: the new GFS2 inode * @new_type: the de_type of the new dirent * * This routine changes the inode number of a directory entry. It's used * by rename to change ".." when a directory is moved. * Assumes a glock is held on dvp. * * Returns: errno
*/
/** * leaf_dealloc - Deallocate a directory leaf * @dip: the directory * @index: the hash table offset in the directory * @len: the number of pointers to this leaf * @leaf_no: the leaf number * @leaf_bh: buffer_head for the starting leaf * @last_dealloc: 1 if this is the final dealloc for the leaf, else 0 * * Returns: errno
*/
error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); if (error != size) { if (error >= 0)
error = -EIO; goto out_end_trans;
}
error = gfs2_meta_inode_buffer(dip, &dibh); if (error) goto out_end_trans;
gfs2_trans_add_meta(dip->i_gl, dibh); /* On the last dealloc, make this a regular file in case we crash.
(We don't want to free these blocks a second time.) */ if (last_dealloc)
dip->i_inode.i_mode = S_IFREG;
gfs2_dinode_out(dip, dibh->b_data);
brelse(dibh);
/** * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory * @dip: the directory * * Dealloc all on-disk directory leaves to FREEMETA state * Change on-disk inode type to "regular file" * * Returns: errno
*/
int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
{ struct buffer_head *bh; struct gfs2_leaf *leaf;
u32 hsize, len;
u32 index = 0, next_index;
__be64 *lp;
u64 leaf_no; int error = 0, last;
hsize = BIT(dip->i_depth);
lp = gfs2_dir_get_hash_table(dip); if (IS_ERR(lp)) return PTR_ERR(lp);
while (index < hsize) {
leaf_no = be64_to_cpu(lp[index]); if (leaf_no) {
error = get_leaf(dip, leaf_no, &bh); if (error) goto out;
leaf = (struct gfs2_leaf *)bh->b_data;
len = BIT(dip->i_depth - be16_to_cpu(leaf->lf_depth));
if (index != hsize) {
gfs2_consist_inode(dip);
error = -EIO;
}
out:
return error;
}
/** * gfs2_diradd_alloc_required - find if adding entry will require an allocation * @inode: the directory inode being written to * @name: the filename that's going to be added * @da: The structure to return dir alloc info * * Returns: 0 if ok, -ve on error
*/
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.