// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004
*/
/* * jfs_dtree.c: directory B+-tree manager * * B+-tree with variable length key directory: * * each directory page is structured as an array of 32-byte * directory entry slots initialized as a freelist * to avoid search/compaction of free space at insertion. * when an entry is inserted, a number of slots are allocated * from the freelist as required to store variable length data * of the entry; when the entry is deleted, slots of the entry * are returned to freelist. * * leaf entry stores full name as key and file serial number * (aka inode number) as data. * internal/router entry stores sufffix compressed name * as key and simple extent descriptor as data. * * each directory page maintains a sorted entry index table * which stores the start slot index of sorted entries * to allow binary search on the table. * * directory starts as a root/leaf page in on-disk inode * inline data area. * when it becomes full, it starts a leaf of a external extent * of length of 1 block. each time the first leaf becomes full, * it is extended rather than split (its size is doubled), * until its length becoms 4 KBytes, from then the extent is split * with new 4 Kbyte extent when it becomes full * to reduce external fragmentation of small directories. * * blah, blah, blah, for linear scan of directory in pieces by * readdir(). * * * case-insensitive directory file system * * names are stored in case-sensitive way in leaf entry. * but stored, searched and compared in case-insensitive (uppercase) order * (i.e., both search key and entry key are folded for search/compare): * (note that case-sensitive order is BROKEN in storage, e.g., * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad * * entries which folds to the same key makes up a equivalent class * whose members are stored as contiguous cluster (may cross page boundary) * but whose order is arbitrary and acts as duplicate, e.g., * abc, Abc, aBc, abC) * * once match is found at leaf, requires scan forward/backward * either for, in case-insensitive search, duplicate * or for, in case-sensitive search, for exact match * * router entry must be created/stored in case-insensitive way * in internal entry: * (right most key of left page and left most key of right page * are folded, and its suffix compression is propagated as router * key in parent) * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> * should be made the router key for the split) * * case-insensitive search: * * fold search key; * * case-insensitive search of B-tree: * for internal entry, router key is already folded; * for leaf entry, fold the entry key before comparison. * * if (leaf entry case-insensitive match found) * if (next entry satisfies case-insensitive match) * return EDUPLICATE; * if (prev entry satisfies case-insensitive match) * return EDUPLICATE; * return match; * else * return no match; * * serialization: * target directory inode lock is being held on entry/exit * of all main directory service routines. * * log based recovery:
*/
staticvoid dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, int do_index);
staticvoid dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);
staticvoid dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);
staticvoid dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
#define ciToUpper(c) UniStrupr((c)->name)
/* * read_index_page() * * Reads a page of a directory's index table. * Having metadata mapped into the directory inode's address space * presents a multitude of problems. We avoid this by mapping to * the absolute address space outside of the *_metapage routines
*/ staticstruct metapage *read_index_page(struct inode *inode, s64 blkno)
{ int rc;
s64 xaddr; int xflag;
s32 xlen;
/* * get_index_page() * * Same as get_index_page(), but get's a new page without reading
*/ staticstruct metapage *get_index_page(struct inode *inode, s64 blkno)
{ int rc;
s64 xaddr; int xflag;
s32 xlen;
/* * It's time to move the inline table to an external * page and begin to build the xtree
*/ if (dquot_alloc_block(ip, sbi->nbperpage)) goto clean_up; if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
dquot_free_block(ip, sbi->nbperpage); goto clean_up;
}
/* * Save the table, we're going to overwrite it with the * xtree root
*/
memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
/* only uppercase if case-insensitive support is on */ if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
ciToUpper(&ciKey);
}
BT_CLR(btstack); /* reset stack */
/* init level count for max pages to split */
btstack->nsplit = 1;
/* * search down tree from root: * * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of * internal page, child page Pi contains entry with k, Ki <= K < Kj. * * if entry with search key K is not found * internal page search find the entry with largest key Ki * less than K which point to the child page to search; * leaf page search find the entry with smallest key Kj * greater than K so that the returned index is the position of * the entry to be shifted right for insertion of new entry. * for empty tree, search key is greater than any key of the tree. * * by convention, root bn = 0.
*/ for (bn = 0;;) { /* get/pin the page to search */
DT_GETPAGE(ip, bn, mp, psize, p, rc); if (rc) goto dtSearch_Exit1;
/* get sorted entry table of the page */
stbl = DT_GETSTBL(p);
/* * binary search with search key K on the current page.
*/ for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
index = base + (lim >> 1);
if (stbl[index] < 0) {
rc = -EIO; goto out;
}
if (p->header.flag & BT_LEAF) { /* uppercase leaf name to compare */
cmp =
ciCompare(&ciKey, p, stbl[index],
JFS_SBI(sb)->mntflag);
} else { /* router key is in uppercase */
cmp = dtCompare(&ciKey, p, stbl[index]);
} if (cmp == 0) { /* * search hit
*/ /* search hit - leaf page: * return the entry found
*/ if (p->header.flag & BT_LEAF) {
inumber = le32_to_cpu(
((struct ldtentry *) & p->slot[stbl[index]])->inumber);
/* * search for JFS_LOOKUP
*/ if (flag == JFS_LOOKUP) {
*data = inumber;
rc = 0; goto out;
}
/* * search for JFS_CREATE
*/ if (flag == JFS_CREATE) {
*data = inumber;
rc = -EEXIST; goto out;
}
/* * search for JFS_REMOVE or JFS_RENAME
*/ if ((flag == JFS_REMOVE ||
flag == JFS_RENAME) &&
*data != inumber) {
rc = -ESTALE; goto out;
}
/* search hit - internal page: * descend/search its child page
*/ goto getChild;
}
if (cmp > 0) {
base = index + 1;
--lim;
}
}
/* * search miss * * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or (maxindex + 1) index.
*/ /* * search miss - leaf page * * return location of entry (base) where new entry with * search key K is to be inserted.
*/ if (p->header.flag & BT_LEAF) { /* * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
*/ if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
flag == JFS_RENAME) {
rc = -ENOENT; goto out;
}
/* * search for JFS_CREATE|JFS_FINDDIR: * * save search result
*/
*data = 0;
btsp = btstack->top;
btsp->bn = bn;
btsp->index = base;
btsp->mp = mp;
rc = 0; goto dtSearch_Exit1;
}
/* * search miss - internal page * * if base is non-zero, decrement base by one to get the parent * entry of the child page to search.
*/
index = base ? base - 1 : base;
/* * go down to child page
*/
getChild: /* update max. number of pages to split */ if (BT_STACK_FULL(btstack)) { /* Something's corrupted, mark filesystem dirty so * chkdsk will fix it.
*/
jfs_error(sb, "stack overrun!\n");
BT_STACK_DUMP(btstack);
rc = -EIO; goto out;
}
btstack->nsplit++;
/* push (bn, index) of the parent page/entry */
BT_PUSH(btstack, bn, index);
/* get the child page block number */
pxd = (pxd_t *) & p->slot[stbl[index]];
bn = addressPXD(pxd);
psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
/* unpin the parent page */
DT_PUTPAGE(mp);
}
out:
DT_PUTPAGE(mp);
dtSearch_Exit1:
kfree(ciKey.name);
dtSearch_Exit2:
return rc;
}
/* * dtInsert() * * function: insert an entry to directory tree * * parameter: * * return: 0 - success; * errno - failure;
*/ int dtInsert(tid_t tid, struct inode *ip, struct component_name * name, ino_t * fsn, struct btstack * btstack)
{ int rc = 0; struct metapage *mp; /* meta-page buffer */
dtpage_t *p; /* base B+-tree index page */
s64 bn; int index; struct dtsplit split; /* split information */
ddata_t data; struct dt_lock *dtlck; int n; struct tlock *tlck; struct lv *lv;
/* * retrieve search result * * dtSearch() returns (leaf page pinned, index at which to insert). * n.b. dtSearch() may return index of (maxindex + 1) of * the full page.
*/
DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); if (p->header.freelist == 0) return -EINVAL;
/* * insert entry for new key
*/ if (DO_INDEX(ip)) { if (JFS_IP(ip)->next_index == DIREND) {
DT_PUTPAGE(mp); return -EMLINK;
}
n = NDTLEAF(name->namlen);
data.leaf.tid = tid;
data.leaf.ip = ip;
} else {
n = NDTLEAF_LEGACY(name->namlen);
data.leaf.ip = NULL; /* signifies legacy directory format */
}
data.leaf.ino = *fsn;
/* * leaf page does not have enough room for new entry: * * extend/split the leaf page; * * dtSplitUp() will insert the entry and unpin the leaf page.
*/ if (n > p->header.freecnt) {
split.mp = mp;
split.index = index;
split.nslot = n;
split.key = name;
split.data = &data;
rc = dtSplitUp(tid, ip, &split, btstack); return rc;
}
/* * leaf page does have enough room for new entry: * * insert the new data entry into the leaf page;
*/
BT_MARK_DIRTY(mp, ip); /* * acquire a transaction lock on the leaf page
*/
tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
dtlck = (struct dt_lock *) & tlck->lock;
ASSERT(dtlck->index == 0);
lv = & dtlck->lv[0];
/* * propagate up the router entry for the leaf page just split * * insert a router entry for the new page into the parent page, * propagate the insert/split up the tree by walking back the stack * of (bn of parent page, index of child page entry in parent page) * that were traversed during the search for the page that split. * * the propagation of insert/split up the tree stops if the root * splits or the page inserted into doesn't have to split to hold * the new entry. * * the parent entry for the split page remains the same, and * a new entry is inserted at its right with the first key and * block number of the new right page. * * There are a maximum of 4 pages pinned at any time: * two children, left parent and right parent (when the parent splits). * keep the child pages pinned while working on the parent. * make sure that all pins are released at exit.
*/ while ((parent = BT_POP(btstack)) != NULL) { /* parent page specified by stack frame <parent> */
/* * insert router entry in parent for new right child page <rp>
*/ /* get the parent page <sp> */
DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); if (rc) {
DT_PUTPAGE(lmp);
DT_PUTPAGE(rmp); goto splitOut;
}
/* * The new key entry goes ONE AFTER the index of parent entry, * because the split was to the right.
*/
skip = parent->index + 1;
/* * compute the key for the router entry * * key suffix compression: * for internal pages that have leaf pages as children, * retain only what's needed to distinguish between * the new entry and the entry on the page to its left. * If the keys compare equal, retain the entire key. * * note that compression is performed only at computing * router key at the lowest internal level. * further compression of the key between pairs of higher * level internal pages loses too much information and * the search may fail. * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} * results in two adjacent parent entries (a)(xx). * if split occurs between these two entries, and * if compression is applied, the router key of parent entry * of right page (x) will divert search for x into right * subtree and miss x in the left subtree.) * * the entire key must be retained for the next-to-leftmost * internal key at any level of the tree, or search may fail * (e.g., ?)
*/ switch (rp->header.flag & BT_TYPE) { case BT_LEAF: /* * compute the length of prefix for suffix compression * between last entry of left page and first entry * of right page
*/ if ((sp->header.flag & BT_ROOT && skip > 1) ||
sp->header.prev != 0 || skip > 1) { /* compute uppercase router prefix key */
rc = ciGetLeafPrefixKey(lp,
lp->header.nextindex-1,
rp, 0, &key,
sbi->mntflag); if (rc) {
DT_PUTPAGE(lmp);
DT_PUTPAGE(rmp);
DT_PUTPAGE(smp); goto splitOut;
}
} else { /* next to leftmost entry of
lowest internal level */
/* * sequential append at tail: append without split * * If splitting the last page on a level because of appending * a entry to it (skip is maxentry), it's likely that the access is * sequential. Adding an empty page on the side of the level is less * work and can push the fill factor much higher than normal. * If we're wrong it's no big deal, we'll just do the split the right * way next time. * (It may look like it's equally easy to do a similar hack for * reverse sorted data, that is, split the tree left, * but it's not. Be my guest.)
*/ if (nextbn == 0 && split->index == sp->header.nextindex) { /* linelock header + stbl (first slot) of new page */
rlv = & rdtlck->lv[rdtlck->index];
rlv->offset = 0;
rlv->length = 2;
rdtlck->index++;
/* * initialize freelist of new right page
*/
f = &rp->slot[fsi]; for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
f->next = fsi;
f->next = -1;
/* insert entry at the first entry of the new right page */
dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
/* * update prev pointer of previous right sibling page;
*/ if (nextbn != 0) {
DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); if (rc) {
discard_metapage(rmp); return rc;
}
BT_MARK_DIRTY(mp, ip); /* * acquire a transaction lock on the next page
*/
tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
tlck, ip, mp);
dtlck = (struct dt_lock *) & tlck->lock;
/* linelock header of previous right sibling page */
lv = & dtlck->lv[dtlck->index];
lv->offset = 0;
lv->length = 1;
dtlck->index++;
p->header.prev = cpu_to_le64(rbn);
DT_PUTPAGE(mp);
}
/* * split the data between the split and right pages.
*/
skip = split->index;
half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */
left = 0;
/* * compute fill factor for split pages * * <nxt> traces the next entry to move to rp * <off> traces the next entry to stay in sp
*/
stbl = (u8 *) & sp->slot[sp->header.stblindex];
nextindex = sp->header.nextindex; for (nxt = off = 0; nxt < nextindex; ++off) { if (off == skip) /* check for fill factor with new entry size */
n = split->nslot; else {
si = stbl[nxt]; switch (sp->header.flag & BT_TYPE) { case BT_LEAF:
ldtentry = (struct ldtentry *) & sp->slot[si]; if (DO_INDEX(ip))
n = NDTLEAF(ldtentry->namlen); else
n = NDTLEAF_LEGACY(ldtentry->
namlen); break;
case BT_INTERNAL:
idtentry = (struct idtentry *) & sp->slot[si];
n = NDTINTERNAL(idtentry->namlen); break;
default: break;
}
++nxt; /* advance to next entry to move in sp */
}
left += n; if (left >= half) break;
}
/* <nxt> poins to the 1st entry to move */
/* * move entries to right page * * dtMoveEntry() initializes rp and reserves entry for insertion * * split page moved out entries are linelocked; * new/right page moved in entries are linelocked;
*/ /* linelock header + stbl of new right page */
rlv = & rdtlck->lv[rdtlck->index];
rlv->offset = 0;
rlv->length = 5;
rdtlck->index++;
/* * finalize freelist of new right page
*/
fsi = rp->header.freelist;
f = &rp->slot[fsi]; for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
f->next = fsi;
f->next = -1;
/* * Update directory index table for entries now in right page
*/ if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
s64 lblock;
mp = NULL;
stbl = DT_GETSTBL(rp); for (n = 0; n < rp->header.nextindex; n++) {
ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
modify_index(tid, ip, le32_to_cpu(ldtentry->index),
rbn, n, &mp, &lblock);
} if (mp)
release_metapage(mp);
}
/* * the skipped index was on the left page,
*/ if (skip <= off) { /* insert the new entry in the split page */
dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
/* linelock stbl of split page */ if (sdtlck->index >= sdtlck->maxcnt)
sdtlck = (struct dt_lock *) txLinelock(sdtlck);
slv = & sdtlck->lv[sdtlck->index];
n = skip >> L2DTSLOTSIZE;
slv->offset = sp->header.stblindex + n;
slv->length =
((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
sdtlck->index++;
} /* * the skipped index was on the right page,
*/ else { /* adjust the skip index to reflect the new position */
skip -= nxt;
/* insert the new entry in the right page */
dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
}
sp->header.maxslot = n;
sp->header.stblindex = newstblindex; /* sp->header.nextindex remains the same */
/* * add old stbl region at head of freelist
*/
fsi = oldstblindex;
f = &sp->slot[fsi];
last = sp->header.freelist; for (n = 0; n < oldstblsize; n++, fsi++, f++) {
f->next = last;
last = fsi;
}
sp->header.freelist = last;
sp->header.freecnt += oldstblsize;
/* * append free region of newly extended area at tail of freelist
*/ /* init free region of newly extended area */
fsi = n = newstblindex + newstblsize;
f = &sp->slot[fsi]; for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
f->next = fsi;
f->next = -1;
/* append new free region at tail of old freelist */
fsi = sp->header.freelist; if (fsi == -1)
sp->header.freelist = n; else { do {
f = &sp->slot[fsi];
fsi = f->next;
} while (fsi != -1);
f->next = n;
}
sp->header.freecnt += sp->header.maxslot - n;
/* * insert the new entry
*/
dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
BT_MARK_DIRTY(pmp, ip); /* * linelock any freeslots residing in old extent
*/ if (type == tlckEXTEND) {
n = sp->header.maxslot >> 2; if (sp->header.freelist < n)
dtLinelockFreelist(sp, n, &dtlck);
}
/* * update parent entry on the parent/root page
*/ /* * acquire a transaction lock on the parent/root page
*/
tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
dtlck = (struct dt_lock *) & tlck->lock;
lv = & dtlck->lv[dtlck->index];
/* * allocate/initialize a single (right) child page * * N.B. at first split, a one (or two) block to fit new entry * is allocated; at subsequent split, a full page is allocated;
*/
pxdlist = split->pxdlist;
pxd = &pxdlist->pxd[pxdlist->npxd];
pxdlist->npxd++;
rbn = addressPXD(pxd);
xlen = lengthPXD(pxd);
xsize = xlen << JFS_SBI(sb)->l2bsize;
rmp = get_metapage(ip, rbn, xsize, 1); if (!rmp) return -EIO;
rp = rmp->data;
/* Allocate blocks to quota. */
rc = dquot_alloc_block(ip, lengthPXD(pxd)); if (rc) {
release_metapage(rmp); return rc;
}
BT_MARK_DIRTY(rmp, ip); /* * acquire a transaction lock on the new right page
*/
tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
dtlck = (struct dt_lock *) & tlck->lock;
/* copy old stbl to new stbl at start of extended area */
rp->header.stblindex = DTROOTMAXSLOT;
stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
memcpy(stbl, sp->header.stbl, sp->header.nextindex);
rp->header.nextindex = sp->header.nextindex;
/* copy old data area to start of new data area */
memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
/* * append free region of newly extended area at tail of freelist
*/ /* init free region of newly extended area */
fsi = n = DTROOTMAXSLOT + stblsize;
f = &rp->slot[fsi]; for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
f->next = fsi;
f->next = -1;
/* append new free region at tail of old freelist */
fsi = sp->header.freelist; if (fsi == -1)
rp->header.freelist = n; else {
rp->header.freelist = fsi;
do {
f = &rp->slot[fsi];
fsi = f->next;
} while (fsi >= 0);
/* * Update directory index table for entries now in right page
*/ if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
s64 lblock; struct metapage *mp = NULL; struct ldtentry *ldtentry;
stbl = DT_GETSTBL(rp); for (n = 0; n < rp->header.nextindex; n++) {
ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
modify_index(tid, ip, le32_to_cpu(ldtentry->index),
rbn, n, &mp, &lblock);
} if (mp)
release_metapage(mp);
} /* * insert the new entry into the new right/child page * (skip index in the new right page will not change)
*/
dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
/* * reset parent/root page * * set the 1st entry offset to 0, which force the left-most key * at any level of the tree to be less than any search key. * * The btree comparison code guarantees that the left-most key on any * level of the tree is never used, so it doesn't need to be filled in.
*/
BT_MARK_DIRTY(smp, ip); /* * acquire a transaction lock on the root page (in-memory inode)
*/
tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
dtlck = (struct dt_lock *) & tlck->lock;
/* * dtDelete() * * function: delete the entry(s) referenced by a key. * * parameter: * * return:
*/ int dtDelete(tid_t tid, struct inode *ip, struct component_name * key, ino_t * ino, int flag)
{ int rc = 0;
s64 bn; struct metapage *mp, *imp;
dtpage_t *p; int index; struct btstack btstack; struct dt_lock *dtlck; struct tlock *tlck; struct lv *lv; int i; struct ldtentry *ldtentry;
u8 *stbl;
u32 table_index, next_index; struct metapage *nmp;
dtpage_t *np;
/* * search for the entry to delete: * * dtSearch() returns (leaf page pinned, index at which to delete).
*/ if ((rc = dtSearch(ip, key, ino, &btstack, flag))) return rc;
/* * We need to find put the index of the next entry into the * directory index table in order to resume a readdir from this * entry.
*/ if (DO_INDEX(ip)) {
stbl = DT_GETSTBL(p);
ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
table_index = le32_to_cpu(ldtentry->index); if (index == (p->header.nextindex - 1)) { /* * Last entry in this leaf page
*/ if ((p->header.flag & BT_ROOT)
|| (p->header.next == 0))
next_index = -1; else { /* Read next leaf page */
DT_GETPAGE(ip, le64_to_cpu(p->header.next),
nmp, PSIZE, np, rc); if (rc)
next_index = -1; else {
stbl = DT_GETSTBL(np);
ldtentry =
(struct ldtentry *) & np->
slot[stbl[0]];
next_index =
le32_to_cpu(ldtentry->index);
DT_PUTPAGE(nmp);
}
}
} else {
ldtentry =
(struct ldtentry *) & p->slot[stbl[index + 1]];
next_index = le32_to_cpu(ldtentry->index);
}
free_index(tid, ip, table_index, next_index);
} /* * the leaf page becomes empty, delete the page
*/ if (p->header.nextindex == 1) { /* delete empty page */
rc = dtDeleteUp(tid, ip, mp, p, &btstack);
} /* * the leaf page has other entries remaining: * * delete the entry from the leaf page.
*/ else {
BT_MARK_DIRTY(mp, ip); /* * acquire a transaction lock on the leaf page
*/
tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
dtlck = (struct dt_lock *) & tlck->lock;
/* * Do not assume that dtlck->index will be zero. During a * rename within a directory, this transaction may have * modified this page already when adding the new entry.
*/
/* linelock stbl of non-root leaf page */ if (!(p->header.flag & BT_ROOT)) { if (dtlck->index >= dtlck->maxcnt)
dtlck = (struct dt_lock *) txLinelock(dtlck);
lv = & dtlck->lv[dtlck->index];
i = index >> L2DTSLOTSIZE;
lv->offset = p->header.stblindex + i;
lv->length =
((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
i + 1;
dtlck->index++;
}
/* free the leaf entry */
dtDeleteEntry(p, index, &dtlck);
/* * Update directory index table for entries moved in stbl
*/ if (DO_INDEX(ip) && index < p->header.nextindex) {
s64 lblock;
imp = NULL;
stbl = DT_GETSTBL(p); for (i = index; i < p->header.nextindex; i++) {
ldtentry =
(struct ldtentry *) & p->slot[stbl[i]];
modify_index(tid, ip,
le32_to_cpu(ldtentry->index),
bn, i, &imp, &lblock);
} if (imp)
release_metapage(imp);
}
DT_PUTPAGE(mp);
}
return rc;
}
/* * dtDeleteUp() * * function: * free empty pages as propagating deletion up the tree * * parameter: * * return:
*/ staticint dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
{ int rc = 0; struct metapage *mp;
dtpage_t *p; int index, nextindex; int xlen; struct btframe *parent; struct dt_lock *dtlck; struct tlock *tlck; struct lv *lv; struct pxd_lock *pxdlock; int i;
/* * keep the root leaf page which has become empty
*/ if (BT_IS_ROOT(fmp)) { /* * reset the root * * dtInitRoot() acquires txlock on the root
*/
dtInitRoot(tid, ip, PARENT(ip));
DT_PUTPAGE(fmp);
return 0;
}
/* * free the non-root leaf page
*/ /* * acquire a transaction lock on the page * * write FREEXTENT|NOREDOPAGE log record * N.B. linelock is overlaid as freed extent descriptor, and * the buffer page is freed;
*/
tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
pxdlock = (struct pxd_lock *) & tlck->lock;
pxdlock->flag = mlckFREEPXD;
pxdlock->pxd = fp->header.self;
pxdlock->index = 1;
/* free/invalidate its buffer page */
discard_metapage(fmp);
/* * propagate page deletion up the directory tree * * If the delete from the parent page makes it empty, * continue all the way up the tree. * stop if the root page is reached (which is never deleted) or * if the entry deletion does not empty the page.
*/ while ((parent = BT_POP(btstack)) != NULL) { /* pin the parent page <sp> */
DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); if (rc) return rc;
/* * free the extent of the child page deleted
*/
index = parent->index;
/* * delete the entry for the child page from parent
*/
nextindex = p->header.nextindex;
/* * the parent has the single entry being deleted: * * free the parent page which has become empty.
*/ if (nextindex == 1) { /* * keep the root internal page which has become empty
*/ if (p->header.flag & BT_ROOT) { /* * reset the root * * dtInitRoot() acquires txlock on the root
*/
dtInitRoot(tid, ip, PARENT(ip));
/* * If this was previously an non-empty directory, we need to remove * the old directory table.
*/ if (DO_INDEX(ip)) { if (!jfs_dirtable_inline(ip)) { struct tblock *tblk = tid_to_tblock(tid); /* * We're playing games with the tid's xflag. If * we're removing a regular file, the file's xtree * is committed with COMMIT_PMAP, but we always * commit the directories xtree with COMMIT_PWMAP.
*/
xflag_save = tblk->xflag;
tblk->xflag = 0; /* * xtTruncate isn't guaranteed to fully truncate * the xtree. The caller needs to check i_size * after committing the transaction to see if * additional truncation is needed. The * COMMIT_Stale flag tells caller that we * initiated the truncation.
*/
xtTruncate(tid, ip, 0, COMMIT_PWMAP);
set_cflag(COMMIT_Stale, ip);
/* * add_missing_indices() * * function: Fix dtree page in which one or more entries has an invalid index. * fsck.jfs should really fix this, but it currently does not. * Called from jfs_readdir when bad index is detected.
*/ staticint add_missing_indices(struct inode *inode, s64 bn)
{ struct ldtentry *d; struct dt_lock *dtlck; int i;
uint index; struct lv *lv; struct metapage *mp;
dtpage_t *p; int rc = 0;
s8 *stbl;
tid_t tid; struct tlock *tlck;
/* * Buffer to hold directory entry info while traversing a dtree page * before being fed to the filldir function
*/ struct jfs_dirent {
loff_t position; int ino;
u16 name_len; char name[];
};
/* * jfs_readdir() * * function: read directory entries sequentially * from the specified entry offset * * parameter: * * return: offset = (pn, index) of start entry * of next jfs_readdir()/dtRead()
*/ int jfs_readdir(struct file *file, struct dir_context *ctx)
{ struct inode *ip = file_inode(file); struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; int rc = 0;
loff_t dtpos; /* legacy OS/2 style position */ struct dtoffset {
s16 pn;
s16 index;
s32 unused;
} *dtoffset = (struct dtoffset *) &dtpos;
s64 bn; struct metapage *mp;
dtpage_t *p; int index;
s8 *stbl; struct btstack btstack; int i, next; struct ldtentry *d; struct dtslot *t; int d_namleft, len, outlen; unsignedlong dirent_buf; char *name_ptr;
u32 dir_index; int do_index = 0;
uint loop_count = 0; struct jfs_dirent *jfs_dirent; int jfs_dirents; int overflow, fix_page, page_fixed = 0; staticint unique_pos = 2; /* If we can't fix broken index */
if (ctx->pos == DIREND) return 0;
if (DO_INDEX(ip)) { /* * persistent index is stored in directory entries. * Special cases: 0 = . * 1 = .. * -1 = End of directory
*/
do_index = 1;
dir_index = (u32) ctx->pos;
/* * NFSv4 reserves cookies 1 and 2 for . and .. so the value * we return to the vfs is one greater than the one we use * internally.
*/ if (dir_index)
dir_index--;
if (dir_index > 1) { struct dir_table_slot dirtab_slot;
if (dtEmpty(ip) ||
(dir_index >= JFS_IP(ip)->next_index)) { /* Stale position. Directory has shrunk */
ctx->pos = DIREND; return 0;
}
repeat:
rc = read_index(ip, dir_index, &dirtab_slot); if (rc) {
ctx->pos = DIREND; return rc;
} if (dirtab_slot.flag == DIR_INDEX_FREE) { if (loop_count++ > JFS_IP(ip)->next_index) {
jfs_err("jfs_readdir detected infinite loop!");
ctx->pos = DIREND; return 0;
}
dir_index = le32_to_cpu(dirtab_slot.addr2); if (dir_index == -1) {
ctx->pos = DIREND; return 0;
} goto repeat;
}
bn = addressDTS(&dirtab_slot);
index = dirtab_slot.slot;
DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) {
ctx->pos = DIREND; return 0;
} if (p->header.flag & BT_INTERNAL) {
jfs_err("jfs_readdir: bad index table");
DT_PUTPAGE(mp);
ctx->pos = DIREND; return 0;
}
} else { if (dir_index == 0) { /* * self "."
*/
ctx->pos = 1; if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) return 0;
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.