// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002
*/
/* * jfs_logmgr.c: log manager * * for related information, see transaction manager (jfs_txnmgr.c), and * recovery manager (jfs_logredo.c). * * note: for detail, RTFS. * * log buffer manager: * special purpose buffer manager supporting log i/o requirements. * per log serial pageout of logpage * queuing i/o requests and redrive i/o at iodone * maintain current logpage buffer * no caching since append only * appropriate jfs buffer cache buffers as needed * * group commit: * transactions which wrote COMMIT records in the same in-memory * log page during the pageout of previous/current log page(s) are * committed together by the pageout of the page. * * TBD lazy commit: * transactions are committed asynchronously when the log page * containing it COMMIT is paged out when it becomes full; * * serialization: * . a per log lock serialize log write. * . a per log lock serialize group commit. * . a per log lock serialize log open/close; * * TBD log integrity: * careful-write (ping-pong) of last logpage to recover from crash * in overwrite. * detection of split (out-of-order) write of physical sectors * of last logpage via timestamp at end of each sector * with its mirror data array at trailer). * * alternatives: * lsn - 64-bit monotonically increasing integer vs * 32-bit lspn and page eor.
*/
/* * See __SLEEP_COND in jfs_locks.h
*/ #define LCACHE_SLEEP_COND(wq, cond, flags) \ do { \ if (cond) \ break; \
__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
} while (0)
#define LCACHE_WAKEUP(event) wake_up(event)
/* * lbuf buffer cache (lCache) control
*/ /* log buffer manager pageout control (cumulative, inclusive) */ #define lbmREAD 0x0001 #define lbmWRITE 0x0002 /* enqueue at tail of write queue; * init pageout if at head of queue;
*/ #define lbmRELEASE 0x0004 /* remove from write queue * at completion of pageout; * do not free/recycle it yet: * caller will free it;
*/ #define lbmSYNC 0x0008 /* do not return to freelist * when removed from write queue;
*/ #define lbmFREE 0x0010 /* return to freelist * at completion of pageout; * the buffer may be recycled;
*/ #define lbmDONE 0x0020 #define lbmERROR 0x0040 #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing * of log page
*/ #define lbmDIRECT 0x0100
/* * Global list of active external journals
*/ static LIST_HEAD(jfs_external_logs); staticstruct jfs_log *dummy_log; static DEFINE_MUTEX(jfs_log_mutex);
/* * initialize page lsn if first log write of the page
*/ if (mp->lsn == 0) {
mp->log = log;
mp->lsn = lsn;
log->count++;
/* insert page at tail of logsynclist */
list_add_tail(&mp->synclist, &log->synclist);
}
/* * initialize/update lsn of tblock of the page * * transaction inherits oldest lsn of pages associated * with allocation/deallocation of resources (their * log records are used to reconstruct allocation map * at recovery time: inode for inode allocation map, * B+-tree index of extent descriptors for block * allocation map); * allocation map pages inherit transaction lsn at * commit time to allow forwarding log syncpt past log * records associated with allocation/deallocation of * resources only after persistent map of these map pages * have been updated and propagated to home.
*/ /* * initialize transaction lsn:
*/ if (tblk->lsn == 0) { /* inherit lsn of its first page logged */
tblk->lsn = mp->lsn;
log->count++;
/* insert tblock after the page on logsynclist */
list_add(&tblk->synclist, &mp->synclist);
} /* * update transaction lsn:
*/ else { /* inherit oldest/smallest lsn of page */
logdiff(diffp, mp->lsn, log);
logdiff(difft, tblk->lsn, log); if (diffp < difft) { /* update tblock lsn with page lsn */
tblk->lsn = mp->lsn;
/* move tblock after page on logsynclist */
list_move(&tblk->synclist, &mp->synclist);
}
}
LOGSYNC_UNLOCK(log, flags);
/* * write the log record
*/
writeRecord:
lsn = lmWriteRecord(log, tblk, lrd, tlck);
/* * forward log syncpt if log reached next syncpt trigger
*/
logdiff(diffp, lsn, log); if (diffp >= log->nextsync)
lsn = lmLogSync(log, 0);
/* update end-of-log lsn */
log->lsn = lsn;
LOG_UNLOCK(log);
/* return end-of-log address */ return lsn;
}
/* * NAME: lmWriteRecord() * * FUNCTION: move the log record to current log page * * PARAMETER: cd - commit descriptor * * RETURN: end-of-log address * * serialization: LOG_LOCK() held on entry/exit
*/ staticint
lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck)
{ int lsn = 0; /* end-of-log address */ struct lbuf *bp; /* dst log page buffer */ struct logpage *lp; /* dst log page */
caddr_t dst; /* destination address in log page */ int dstoffset; /* end-of-log offset in log page */ int freespace; /* free space in log page */
caddr_t p; /* src meta-data page */
caddr_t src; int srclen; int nbytes; /* number of bytes to move */ int i; int len; struct linelock *linelock; struct lv *lv; struct lvd *lvd; int l2linesize;
/* * enqueue tblock for group commit: * * enqueue tblock of non-trivial/synchronous COMMIT * at tail of group commit queue * (trivial/asynchronous COMMITs are ignored by * group commit.)
*/
LOGGC_LOCK(log);
/* * NAME: lmNextPage() * * FUNCTION: write current page and allocate next page. * * PARAMETER: log * * RETURN: 0 * * serialization: LOG_LOCK() held on entry/exit
*/ staticint lmNextPage(struct jfs_log * log)
{ struct logpage *lp; int lspn; /* log sequence page number */ int pn; /* current page number */ struct lbuf *bp; struct lbuf *nextbp; struct tblock *tblk;
/* get current log page number and log sequence page number */
pn = log->page;
bp = log->bp;
lp = (struct logpage *) bp->l_ldata;
lspn = le32_to_cpu(lp->h.page);
LOGGC_LOCK(log);
/* * write or queue the full page at the tail of write queue
*/ /* get the tail tblk on commit queue */ if (list_empty(&log->cqueue))
tblk = NULL; else
tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
/* every tblk who has COMMIT record on the current page, * and has not been committed, must be on commit queue * since tblk is queued at commit queueu at the time * of writing its COMMIT record on the page before * page becomes full (even though the tblk thread * who wrote COMMIT record may have been suspended * currently);
*/
/* is page bound with outstanding tail tblk ? */ if (tblk && tblk->pn == pn) { /* mark tblk for end-of-page */
tblk->flag |= tblkGC_EOP;
if (log->cflag & logGC_PAGEOUT) { /* if page is not already on write queue, * just enqueue (no lbmWRITE to prevent redrive) * buffer to wqueue to ensure correct serial order * of the pages since log pages will be added * continuously
*/ if (bp->l_wqnext == NULL)
lbmWrite(log, bp, 0, 0);
} else { /* * No current GC leader, initiate group commit
*/
log->cflag |= logGC_PAGEOUT;
lmGCwrite(log, 0);
}
} /* page is not bound with outstanding tblk: * init write or mark it to be redriven (lbmWRITE)
*/ else { /* finalize the page */
bp->l_ceor = bp->l_eor;
lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
}
LOGGC_UNLOCK(log);
/* * allocate/initialize next page
*/ /* if log wraps, the first data page of log is 2 * (0 never used, 1 is superblock).
*/
log->page = (pn == log->size - 1) ? 2 : pn + 1;
log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */
/* * NAME: lmGroupCommit() * * FUNCTION: group commit * initiate pageout of the pages with COMMIT in the order of * page number - redrive pageout of the page at the head of * pageout queue until full page has been written. * * RETURN: * * NOTE: * LOGGC_LOCK serializes log group commit queue, and * transaction blocks on the commit queue. * N.B. LOG_LOCK is NOT held during lmGroupCommit().
*/ int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
{ int rc = 0;
LOGGC_LOCK(log);
/* group committed already ? */ if (tblk->flag & tblkGC_COMMITTED) { if (tblk->flag & tblkGC_ERROR)
rc = -EIO;
if (tblk->xflag & COMMIT_LAZY)
tblk->flag |= tblkGC_LAZY;
if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
(!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
|| jfs_tlocks_low)) { /* * No pageout in progress * * start group commit as its group leader.
*/
log->cflag |= logGC_PAGEOUT;
lmGCwrite(log, 0);
}
if (tblk->xflag & COMMIT_LAZY) { /* * Lazy transactions can leave now
*/
LOGGC_UNLOCK(log); return 0;
}
/* lmGCwrite gives up LOGGC_LOCK, check again */
if (tblk->flag & tblkGC_COMMITTED) { if (tblk->flag & tblkGC_ERROR)
rc = -EIO;
/* removed from commit queue */ if (tblk->flag & tblkGC_ERROR)
rc = -EIO;
LOGGC_UNLOCK(log); return rc;
}
/* * NAME: lmGCwrite() * * FUNCTION: group commit write * initiate write of log page, building a group of all transactions * with commit records on that page. * * RETURN: None * * NOTE: * LOGGC_LOCK must be held by caller. * N.B. LOG_LOCK is NOT held during lmGroupCommit().
*/ staticvoid lmGCwrite(struct jfs_log * log, int cant_write)
{ struct lbuf *bp; struct logpage *lp; int gcpn; /* group commit page number */ struct tblock *tblk; struct tblock *xtblk = NULL;
/* * build the commit group of a log page * * scan commit queue and make a commit group of all * transactions with COMMIT records on the same log page.
*/ /* get the head tblk on the commit queue */
gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
list_for_each_entry(tblk, &log->cqueue, cqueue) { if (tblk->pn != gcpn) break;
xtblk = tblk;
/* state transition: (QUEUE, READY) -> COMMIT */
tblk->flag |= tblkGC_COMMIT;
}
tblk = xtblk; /* last tblk of the page */
/* * pageout to commit transactions on the log page.
*/
bp = (struct lbuf *) tblk->bp;
lp = (struct logpage *) bp->l_ldata; /* is page already full ? */ if (tblk->flag & tblkGC_EOP) { /* mark page to free at end of group commit of the page */
tblk->flag &= ~tblkGC_EOP;
tblk->flag |= tblkGC_FREE;
bp->l_ceor = bp->l_eor;
lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
cant_write);
INCREMENT(lmStat.full_page);
} /* page is not yet full */ else {
bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */
lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
INCREMENT(lmStat.partial_page);
}
}
/* * NAME: lmPostGC() * * FUNCTION: group commit post-processing * Processes transactions after their commit records have been written * to disk, redriving log I/O if necessary. * * RETURN: None * * NOTE: * This routine is called a interrupt time by lbmIODone
*/ staticvoid lmPostGC(struct lbuf * bp)
{ unsignedlong flags; struct jfs_log *log = bp->l_log; struct logpage *lp; struct tblock *tblk, *temp;
//LOGGC_LOCK(log);
spin_lock_irqsave(&log->gclock, flags); /* * current pageout of group commit completed. * * remove/wakeup transactions from commit queue who were * group committed with the current log page
*/
list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) { if (!(tblk->flag & tblkGC_COMMIT)) break; /* if transaction was marked GC_COMMIT then * it has been shipped in the current pageout * and made it to disk - it is committed.
*/
if (bp->l_flag & lbmERROR)
tblk->flag |= tblkGC_ERROR;
/* remove it from the commit queue */
list_del(&tblk->cqueue);
tblk->flag &= ~tblkGC_QUEUE;
if (tblk == log->flush_tblk) { /* we can stop flushing the log now */
clear_bit(log_FLUSH, &log->flag);
log->flush_tblk = NULL;
}
jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
tblk->flag);
if (!(tblk->xflag & COMMIT_FORCE)) /* * Hand tblk over to lazy commit thread
*/
txLazyUnlock(tblk); else { /* state transition: COMMIT -> COMMITTED */
tblk->flag |= tblkGC_COMMITTED;
if (tblk->flag & tblkGC_READY)
log->gcrtc--;
LOGGC_WAKEUP(tblk);
}
/* was page full before pageout ? * (and this is the last tblk bound with the page)
*/ if (tblk->flag & tblkGC_FREE)
lbmFree(bp); /* did page become full after pageout ? * (and this is the last tblk bound with the page)
*/ elseif (tblk->flag & tblkGC_EOP) { /* finalize the page */
lp = (struct logpage *) bp->l_ldata;
bp->l_ceor = bp->l_eor;
lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
jfs_info("lmPostGC: calling lbmWrite");
lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
1);
}
}
/* are there any transactions who have entered lnGroupCommit() * (whose COMMITs are after that of the last log page written. * They are waiting for new group commit (above at (SLEEP 1)) * or lazy transactions are on a full (queued) log page, * select the latest ready transaction as new group leader and * wake her up to lead her group.
*/ if ((!list_empty(&log->cqueue)) &&
((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low)) /* * Call lmGCwrite with new group leader
*/
lmGCwrite(log, 1);
/* no transaction are ready yet (transactions are only just * queued (GC_QUEUE) and not entered for group commit yet). * the first transaction entering group commit * will elect herself as new group leader.
*/ else
log->cflag &= ~logGC_PAGEOUT;
/* * NAME: lmLogSync() * * FUNCTION: write log SYNCPT record for specified log * if new sync address is available * (normally the case if sync() is executed by back-ground * process). * calculate new value of i_nextsync which determines when * this code is called again. * * PARAMETERS: log - log structure * hard_sync - 1 to force all metadata to be written * * RETURN: 0 * * serialization: LOG_LOCK() held on entry/exit
*/ staticint lmLogSync(struct jfs_log * log, int hard_sync)
{ int logsize; int written; /* written since last syncpt */ int free; /* free space left available */ int delta; /* additional delta to write normally */ int more; /* additional write granted */ struct lrd lrd; int lsn; struct logsyncblk *lp; unsignedlong flags;
/* push dirty metapages out to disk */ if (hard_sync)
write_special_inodes(log, filemap_fdatawrite); else
write_special_inodes(log, filemap_flush);
/* * forward syncpt
*/ /* if last sync is same as last syncpt, * invoke sync point forward processing to update sync.
*/
logdiff(written, lsn, log);
free = logsize - written;
delta = LOGSYNC_DELTA(logsize);
more = min(free / 2, delta); if (more < 2 * LOGPSIZE) {
jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); /* * log wrapping * * option 1 - panic ? No.! * option 2 - shutdown file systems * associated with log ? * option 3 - extend log ? * option 4 - second chance * * mark log wrapped, and continue. * when all active transactions are completed, * mark log valid for recovery. * if crashed during invalid state, log state * implies invalid log, forcing fsck().
*/ /* mark log state log wrap in log superblock */ /* log->state = LOGWRAP; */
/* reset sync point computation */
log->syncpt = log->sync = lsn;
log->nextsync = delta;
} else /* next syncpt trigger = written + more */
log->nextsync = written + more;
/* if number of bytes written from last sync point is more * than 1/4 of the log size, stop new transactions from * starting until all current transactions are completed * by setting syncbarrier flag.
*/ if (!test_bit(log_SYNCBARRIER, &log->flag) &&
(written > LOGSYNC_BARRIER(logsize)) && log->active) {
set_bit(log_SYNCBARRIER, &log->flag);
jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
log->syncpt); /* * We may have to initiate group commit
*/
jfs_flush_journal(log, 0);
}
return lsn;
}
/* * NAME: jfs_syncpt * * FUNCTION: write log SYNCPT record for specified log * * PARAMETERS: log - log structure * hard_sync - set to 1 to force metadata to be written
*/ void jfs_syncpt(struct jfs_log *log, int hard_sync)
{ LOG_LOCK(log); if (!test_bit(log_QUIESCE, &log->flag))
lmLogSync(log, hard_sync);
LOG_UNLOCK(log);
}
/* * NAME: lmLogOpen() * * FUNCTION: open the log on first open; * insert filesystem in the active list of the log. * * PARAMETER: ipmnt - file system mount inode * iplog - log inode (out) * * RETURN: * * serialization:
*/ int lmLogOpen(struct super_block *sb)
{ int rc; struct file *bdev_file; struct jfs_log *log; struct jfs_sb_info *sbi = JFS_SBI(sb);
if (sbi->flag & JFS_NOINTEGRITY) return open_dummy_log(sb);
if (sbi->mntflag & JFS_INLINELOG) return open_inline_log(sb);
mutex_lock(&jfs_log_mutex);
list_for_each_entry(log, &jfs_external_logs, journal_list) { if (file_bdev(log->bdev_file)->bd_dev == sbi->logdev) { if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
jfs_warn("wrong uuid on JFS journal");
mutex_unlock(&jfs_log_mutex); return -EINVAL;
} /* * add file system to log active file system list
*/ if ((rc = lmLogFileSystem(log, sbi, 1))) {
mutex_unlock(&jfs_log_mutex); return rc;
} goto journal_found;
}
}
/* * NAME: lmLogInit() * * FUNCTION: log initialization at first log open. * * logredo() (or logformat()) should have been run previously. * initialize the log from log superblock. * set the log state in the superblock to LOGMOUNT and * write SYNCPT log record. * * PARAMETER: log - log structure * * RETURN: 0 - if ok * -EINVAL - bad log magic number or superblock dirty * error returned from logwait() * * serialization: single first open thread
*/ int lmLogInit(struct jfs_log * log)
{ int rc = 0; struct lrd lrd; struct logsuper *logsuper; struct lbuf *bpsuper; struct lbuf *bp; struct logpage *lp; int lsn = 0;
jfs_info("lmLogInit: log:0x%p", log);
/* initialize the group commit serialization lock */
LOGGC_LOCK_INIT(log);
/* allocate/initialize the log write serialization lock */
LOG_LOCK_INIT(log);
if (!test_bit(log_INLINELOG, &log->flag))
log->l2bsize = L2LOGPSIZE;
/* check for disabled journaling to disk */ if (log->no_integrity) { /* * Journal pages will still be filled. When the time comes * to actually do the I/O, the write is not done, and the * endio routine is called directly.
*/
bp = lbmAllocate(log , 0);
log->bp = bp;
bp->l_pn = bp->l_eor = 0;
} else { /* * validate log superblock
*/ if ((rc = lbmRead(log, 1, &bpsuper))) goto errout10;
logsuper = (struct logsuper *) bpsuper->l_ldata;
if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
jfs_warn("*** Log Format Error ! ***");
rc = -EINVAL; goto errout20;
}
/* logredo() should have been run successfully. */ if (logsuper->state != cpu_to_le32(LOGREDONE)) {
jfs_warn("*** Log Is Dirty ! ***");
rc = -EINVAL; goto errout20;
}
/* * NAME: lmLogClose() * * FUNCTION: remove file system <ipmnt> from active list of log <iplog> * and close it on last close. * * PARAMETER: sb - superblock * * RETURN: errors from subroutines * * serialization:
*/ int lmLogClose(struct super_block *sb)
{ struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; struct file *bdev_file; int rc = 0;
/* * We need to make sure all of the "written" metapages * actually make it to disk
*/
sync_blockdev(sb->s_bdev);
if (test_bit(log_INLINELOG, &log->flag)) { /* * in-line log in host file system
*/
rc = lmLogShutdown(log);
kfree(log); goto out;
}
if (!log->no_integrity)
lmLogFileSystem(log, sbi, 0);
if (!list_empty(&log->sb_list)) goto out;
/* * TODO: ensure that the dummy_log is in a state to allow * lbmLogShutdown to deallocate all the buffers and call * kfree against dummy_log. For now, leave dummy_log & its * buffers in memory, and resuse if another no-integrity mount * is requested.
*/ if (log->no_integrity) goto out;
/* * external log as separate logical volume
*/
list_del(&log->journal_list);
bdev_file = log->bdev_file;
rc = lmLogShutdown(log);
/* * NAME: jfs_flush_journal() * * FUNCTION: initiate write of any outstanding transactions to the journal * and optionally wait until they are all written to disk * * wait == 0 flush until latest txn is committed, don't wait * wait == 1 flush until latest txn is committed, wait * wait > 1 flush until all txn's are complete, wait
*/ void jfs_flush_journal(struct jfs_log *log, int wait)
{ int i; struct tblock *target = NULL;
/* jfs_write_inode may call us during read-only mount */ if (!log) return;
if (!list_empty(&log->cqueue)) { /* * This ensures that we will keep writing to the journal as long * as there are unwritten commit records
*/
target = list_entry(log->cqueue.prev, struct tblock, cqueue);
if (test_bit(log_FLUSH, &log->flag)) { /* * We're already flushing. * if flush_tblk is NULL, we are flushing everything, * so leave it that way. Otherwise, update it to the * latest transaction
*/ if (log->flush_tblk)
log->flush_tblk = target;
} else { /* Only flush until latest transaction is committed */
log->flush_tblk = target;
set_bit(log_FLUSH, &log->flag);
/* * Initiate I/O on outstanding transactions
*/ if (!(log->cflag & logGC_PAGEOUT)) {
log->cflag |= logGC_PAGEOUT;
lmGCwrite(log, 0);
}
}
} if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) { /* Flush until all activity complete */
set_bit(log_FLUSH, &log->flag);
log->flush_tblk = NULL;
}
/* * If there was recent activity, we may need to wait * for the lazycommit thread to catch up
*/ if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { for (i = 0; i < 200; i++) { /* Too much? */
msleep(250);
write_special_inodes(log, filemap_fdatawrite); if (list_empty(&log->cqueue) &&
list_empty(&log->synclist)) break;
}
}
assert(list_empty(&log->cqueue));
#ifdef CONFIG_JFS_DEBUG if (!list_empty(&log->synclist)) { struct logsyncblk *lp;
/* * NAME: lmLogShutdown() * * FUNCTION: log shutdown at last LogClose(). * * write log syncpt record. * update super block to set redone flag to 0. * * PARAMETER: log - log inode * * RETURN: 0 - success * * serialization: single last close thread
*/ int lmLogShutdown(struct jfs_log * log)
{ int rc; struct lrd lrd; int lsn; struct logsuper *logsuper; struct lbuf *bpsuper; struct lbuf *bp; struct logpage *lp;
jfs_info("lmLogShutdown: log:0x%p", log);
jfs_flush_journal(log, 2);
/* * write the last SYNCPT record with syncpoint = 0 * (i.e., log redo up to HERE !)
*/
lrd.logtid = 0;
lrd.backchain = 0;
lrd.type = cpu_to_le16(LOG_SYNCPT);
lrd.length = 0;
lrd.log.syncpt.sync = 0;
/* * synchronous update log superblock * mark log state as shutdown cleanly * (i.e., Log does not need to be replayed).
*/ if ((rc = lbmRead(log, 1, &bpsuper))) goto out;
out: /* * shutdown per log i/o
*/
lbmLogShutdown(log);
if (rc) {
jfs_warn("lmLogShutdown: exit(%d)", rc);
} return rc;
}
/* * NAME: lmLogFileSystem() * * FUNCTION: insert (<activate> = true)/remove (<activate> = false) * file system into/from log active file system list. * * PARAMETE: log - pointer to logs inode. * fsdev - kdev_t of filesystem. * serial - pointer to returned log serial number * activate - insert/remove device from active list. * * RETURN: 0 - success * errors returned by vms_iowait().
*/ staticint lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, int activate)
{ int rc = 0; int i; struct logsuper *logsuper; struct lbuf *bpsuper;
uuid_t *uuid = &sbi->uuid;
/* * insert/remove file system device to log active file system list.
*/ if ((rc = lbmRead(log, 1, &bpsuper))) return rc;
logsuper = (struct logsuper *) bpsuper->l_ldata; if (activate) { for (i = 0; i < MAX_ACTIVE; i++) if (uuid_is_null(&logsuper->active[i].uuid)) {
uuid_copy(&logsuper->active[i].uuid, uuid);
sbi->aggregate = i; break;
} if (i == MAX_ACTIVE) {
jfs_warn("Too many file systems sharing journal!");
lbmFree(bpsuper); return -EMFILE; /* Is there a better rc? */
}
} else { for (i = 0; i < MAX_ACTIVE; i++) if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
uuid_copy(&logsuper->active[i].uuid,
&uuid_null); break;
} if (i == MAX_ACTIVE) {
jfs_warn("Somebody stomped on the journal!");
lbmFree(bpsuper); return -EIO;
}
}
/* * synchronous write log superblock: * * write sidestream bypassing write queue: * at file system mount, log super block is updated for * activation of the file system before any log record * (MOUNT record) of the file system, and at file system * unmount, all meta data for the file system has been * flushed before log super block is updated for deactivation * of the file system.
*/
lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
rc = lbmIOWait(bpsuper, lbmFREE);
return rc;
}
/* * log buffer manager (lbm) * ------------------------ * * special purpose buffer manager supporting log i/o requirements. * * per log write queue: * log pageout occurs in serial order by fifo write queue and * restricting to a single i/o in pregress at any one time. * a circular singly-linked list * (log->wrqueue points to the tail, and buffers are linked via * bp->wrqueue field), and * maintains log page in pageout ot waiting for pageout in serial pageout.
*/
/* * lbmLogInit() * * initialize per log I/O setup at lmLogInit()
*/ staticint lbmLogInit(struct jfs_log * log)
{ /* log inode */ int i; struct lbuf *lbuf;
jfs_info("lbmLogInit: log:0x%p", log);
/* initialize current buffer cursor */
log->bp = NULL;
/* * Each log has its own buffer pages allocated to it. These are * not managed by the page cache. This ensures that a transaction * writing to the log does not block trying to allocate a page from * the page cache (for the log). This would be bad, since page * allocation waits on the kswapd thread that may be committing inodes * which would cause log activity. Was that clear? I'm trying to * avoid deadlock here.
*/
init_waitqueue_head(&log->free_wait);
log->lbuf_free = NULL;
for (i = 0; i < LOGPAGES;) { char *buffer;
uint offset; struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page) goto error;
buffer = page_address(page); for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); if (lbuf == NULL) { if (offset == 0)
__free_page(page); goto error;
} if (offset) /* we already have one reference */
get_page(page);
lbuf->l_offset = offset;
lbuf->l_ldata = buffer + offset;
lbuf->l_page = page;
lbuf->l_log = log;
init_waitqueue_head(&lbuf->l_ioevent);
bio->bi_end_io = lbmIODone;
bio->bi_private = bp; /*check if journaling to disk has been disabled*/ if (log->no_integrity) {
bio->bi_iter.bi_size = 0;
lbmIODone(bio);
} else {
submit_bio(bio);
}
/* * lbmWrite() * * buffer at head of pageout queue stays after completion of * partial-page pageout and redriven by explicit initiation of * pageout by caller until full-page pageout is completed and * released. * * device driver i/o done redrives pageout of new buffer at * head of pageout queue when current buffer at head of pageout * queue is released at the completion of its full-page pageout. * * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit(). * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
*/ staticvoid lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block)
{ struct lbuf *tail; unsignedlong flags;
/* * insert bp at tail of write queue associated with log * * (request is either for bp already/currently at head of queue * or new bp to be inserted at tail)
*/
tail = log->wqueue;
/* is buffer not already on write queue ? */ if (bp->l_wqnext == NULL) { /* insert at tail of wqueue */ if (tail == NULL) {
log->wqueue = bp;
bp->l_wqnext = bp;
} else {
log->wqueue = bp;
bp->l_wqnext = tail->l_wqnext;
tail->l_wqnext = bp;
}
tail = bp;
}
/* is buffer at head of wqueue and for write ? */ if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
LCACHE_UNLOCK(flags); /* unlock+enable */ return;
}
/* check if journaling to disk has been disabled */ if (log->no_integrity) {
bio->bi_iter.bi_size = 0;
lbmIODone(bio);
} else {
submit_bio(bio);
INCREMENT(lmStat.submitted);
}
}
/* * lbmIOWait()
*/ staticint lbmIOWait(struct lbuf * bp, int flag)
{ unsignedlong flags; int rc = 0;
/* * pageout completion * * the bp at the head of write queue has completed pageout. * * if single-commit/full-page pageout, remove the current buffer * from head of pageout queue, and redrive pageout with * the new buffer at head of pageout queue; * otherwise, the partial-page pageout buffer stays at * the head of pageout queue to be redriven for pageout * by lmGroupCommit() until full-page pageout is completed.
*/
bp->l_flag &= ~lbmWRITE;
INCREMENT(lmStat.pagedone);
if (bp->l_flag & lbmDIRECT) {
LCACHE_WAKEUP(&bp->l_ioevent);
LCACHE_UNLOCK(flags); return;
}
tail = log->wqueue;
/* single element queue */ if (bp == tail) { /* remove head buffer of full-page pageout * from log device write queue
*/ if (bp->l_flag & lbmRELEASE) {
log->wqueue = NULL;
bp->l_wqnext = NULL;
}
} /* multi element queue */ else { /* remove head buffer of full-page pageout * from log device write queue
*/ if (bp->l_flag & lbmRELEASE) {
nextbp = tail->l_wqnext = bp->l_wqnext;
bp->l_wqnext = NULL;
/* * redrive pageout of next page at head of write queue: * redrive next page without any bound tblk * (i.e., page w/o any COMMIT records), or * first page of new group commit which has been * queued after current page (subsequent pageout * is performed synchronously, except page without * any COMMITs) by lmGroupCommit() as indicated * by lbmWRITE flag;
*/ if (nextbp->l_flag & lbmWRITE) { /* * We can't do the I/O at interrupt time. * The jfsIO thread can do it
*/
lbmRedrive(nextbp);
}
}
}
/* * synchronous pageout: * * buffer has not necessarily been removed from write queue * (e.g., synchronous write of partial-page with COMMIT): * leave buffer for i/o initiator to dispose
*/ if (bp->l_flag & lbmSYNC) {
LCACHE_UNLOCK(flags); /* unlock+enable */
/* * asynchronous pageout: * * buffer must have been removed from write queue: * insert buffer at head of freelist where it can be recycled
*/ else {
assert(bp->l_flag & lbmRELEASE);
assert(bp->l_flag & lbmFREE);
lbmfree(bp);
LCACHE_UNLOCK(flags); /* unlock+enable */
}
}
int jfsIOWait(void *arg)
{ struct lbuf *bp;
do {
spin_lock_irq(&log_redrive_lock); while ((bp = log_redrive_list)) {
log_redrive_list = bp->l_redrive_next;
bp->l_redrive_next = NULL;
spin_unlock_irq(&log_redrive_lock);
lbmStartIO(bp);
spin_lock_irq(&log_redrive_lock);
}
if (freezing(current)) {
spin_unlock_irq(&log_redrive_lock);
try_to_freeze();
} else {
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_irq(&log_redrive_lock);
schedule();
}
} while (!kthread_should_stop());
jfs_info("jfsIOWait being killed!"); return 0;
}
/* * NAME: lmLogFormat()/jfs_logform() * * FUNCTION: format file system log * * PARAMETERS: * log - volume log * logAddress - start address of log space in FS block * logSize - length of log space in FS block; * * RETURN: 0 - success * -EIO - i/o error * * XXX: We're synchronously writing one page at a time. This needs to * be improved by writing multiple pages at once.
*/ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
{ int rc = -EIO; struct jfs_sb_info *sbi; struct logsuper *logsuper; struct logpage *lp; int lspn; /* log sequence page number */ struct lrd *lrd_ptr; int npages = 0; struct lbuf *bp;
/* * init pages 2 to npages-1 as log data pages: * * log page sequence number (lpsn) initialization: * * pn: 0 1 2 3 n-1 * +-----+-----+=====+=====+===.....===+=====+ * lspn: N-1 0 1 N-2 * <--- N page circular file ----> * * the N (= npages-2) data pages of the log is maintained as * a circular file for the log records; * lpsn grows by 1 monotonically as each log page is written * to the circular file of the log; * and setLogpage() will not reset the page number even if * the eor is equal to LOGPHDRSIZE. In order for binary search * still work in find log end process, we have to simulate the * log wrap situation at the log format time. * The 1st log page written will have the highest lpsn. Then * the succeeding log pages will have ascending order of * the lspn starting from 0, ... (N-2)
*/
lp = (struct logpage *) bp->l_ldata; /* * initialize 1st log page to be written: lpsn = N - 1, * write a SYNCPT log record is written to this page
*/
lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.