// SPDX-License-Identifier: GPL-2.0 /* * linux/kernel/acct.c * * BSD Process Accounting for Linux * * Author: Marco van Wieringen <mvw@planets.elm.net> * * Some code based on ideas and code from: * Thomas K. Dyas <tdyas@eden.rutgers.edu> * * This file implements BSD-style process accounting. Whenever any * process exits, an accounting record of type "struct acct" is * written to the file specified with the acct() system call. It is * up to user-level programs to do useful things with the accounting * log. The kernel just provides the raw accounting information. * * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V. * * Plugged two leaks. 1) It didn't return acct_file into the free_filps if * the file happened to be read-only. 2) If the accounting was suspended * due to the lack of space it happily allowed to reopen it and completely * lost the old acct_file. 3/10/98, Al Viro. * * Now we silently close acct_file on attempt to reopen. Cleaned sys_acct(). * XTerms and EMACS are manifestations of pure evil. 21/10/98, AV. * * Fixed a nasty interaction with sys_umount(). If the accounting * was suspeneded we failed to stop it on umount(). Messy. * Another one: remount to readonly didn't stop accounting. * Question: what should we do if we have CAP_SYS_ADMIN but not * CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY * unless we are messing with the root. In that case we are getting a * real mess with do_remount_sb(). 9/11/98, AV. * * Fixed a bunch of races (and pair of leaks). Probably not the best way, * but this one obviously doesn't introduce deadlocks. Later. BTW, found * one race (and leak) in BSD implementation. * OK, that's better. ANOTHER race and leak in BSD variant. There always * is one more bug... 10/11/98, AV. * * Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold * ->mmap_lock to walk the vma list of current->mm. Nasty, since it leaks * a struct file opened for write. Fixed. 2/6/2000, AV.
*/
/* * These constants control the amount of freespace that suspend and * resume the process accounting system, and the time delay between * each check. * Turned into sysctl-controllable parameters. AV, 12/11/98
*/
staticint acct_parm[3] = {4, 2, 30}; #define RESUME (acct_parm[0]) /* >foo% free space - resume */ #define SUSPEND (acct_parm[1]) /* <foo% free space - suspend */ #define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */
/* * Check the amount of free space and suspend/resume accordingly.
*/ staticbool check_free_space(struct bsd_acct_struct *acct)
{ struct kstatfs sbuf;
if (!acct->check_space) return acct->active;
/* May block */ if (vfs_statfs(&acct->file->f_path, &sbuf)) return acct->active;
staticvoid acct_pin_kill(struct fs_pin *pin)
{ struct bsd_acct_struct *acct = to_acct(pin);
mutex_lock(&acct->lock); /* * Fill the accounting struct with the exiting task's info * before punting to the workqueue.
*/
fill_ac(acct);
schedule_work(&acct->work);
wait_for_completion(&acct->done);
cmpxchg(&acct->ns->bacct, pin, NULL);
mutex_unlock(&acct->lock);
pin_remove(pin);
acct_put(acct);
}
/* We were fired by acct_pin_kill() which holds acct->lock. */
acct_write_process(acct); if (file->f_op->flush)
file->f_op->flush(file, NULL);
__fput_sync(file);
complete(&acct->done);
}
/** * sys_acct - enable/disable process accounting * @name: file name for accounting records or NULL to shutdown accounting * * sys_acct() is the only system call needed to implement process * accounting. It takes the name of the file where accounting records * should be written. If the filename is NULL, accounting will be * shutdown. * * Returns: 0 for success or negative errno values for failure.
*/
SYSCALL_DEFINE1(acct, constchar __user *, name)
{ int error = 0;
/* * encode an u64 into a comp_t * * This routine has been adopted from the encode_comp_t() function in * the kern_acct.c file of the FreeBSD operating system. The encoding * is a 13-bit fraction with a 3-bit (base 8) exponent.
*/
#define MANTSIZE 13 /* 13 bit mantissa. */ #define EXPSIZE 3 /* Base 8 (3 bit) exponent. */ #define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */
static comp_t encode_comp_t(u64 value)
{ int exp, rnd;
exp = rnd = 0; while (value > MAXFRACT) {
rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */
value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */
exp++;
}
/* * If we need to round up, do it (and handle overflow correctly).
*/ if (rnd && (++value > MAXFRACT)) {
value >>= EXPSIZE;
exp++;
}
if (exp > (((comp_t) ~0U) >> MANTSIZE)) return (comp_t) ~0U; /* * Clean it up and polish it off.
*/
exp <<= MANTSIZE; /* Shift the exponent into place */
exp += value; /* and add on the mantissa. */ return exp;
}
#if ACCT_VERSION == 1 || ACCT_VERSION == 2 /* * encode an u64 into a comp2_t (24 bits) * * Format: 5 bit base 2 exponent, 20 bits mantissa. * The leading bit of the mantissa is not stored, but implied for * non-zero exponents. * Largest encodable value is 50 bits.
*/
#define MANTSIZE2 20 /* 20 bit mantissa. */ #define EXPSIZE2 5 /* 5 bit base 2 exponent. */ #define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */ #define MAXEXP2 ((1 << EXPSIZE2) - 1) /* Maximum exponent. */
static comp2_t encode_comp2_t(u64 value)
{ int exp, rnd;
exp = (value > (MAXFRACT2>>1));
rnd = 0; while (value > MAXFRACT2) {
rnd = value & 1;
value >>= 1;
exp++;
}
/* * If we need to round up, do it (and handle overflow correctly).
*/ if (rnd && (++value > MAXFRACT2)) {
value >>= 1;
exp++;
}
if (exp > MAXEXP2) { /* Overflow. Return largest representable number instead. */ return (1ul << (MANTSIZE2+EXPSIZE2-1)) - 1;
} else { return (value & (MAXFRACT2>>1)) | (exp << (MANTSIZE2-1));
}
} #elif ACCT_VERSION == 3 /* * encode an u64 into a 32 bit IEEE float
*/ static u32 encode_float(u64 value)
{ unsigned exp = 190; unsigned u;
if (value == 0) return 0; while ((s64)value > 0) {
value <<= 1;
exp--;
}
u = (u32)(value >> 40) & 0x7fffffu; return u | (exp << 23);
} #endif
/* * Write an accounting entry for an exiting process * * The acct_process() call is the workhorse of the process * accounting system. The struct acct is built here and then written * into the accounting file. This function should only be called from * do_exit() or when switching to a different output file.
*/
/* Perform file operations on behalf of whoever enabled accounting */
cred = override_creds(file->f_cred);
/* * First check to see if there is enough free_space to continue * the process accounting system. Then get freeze protection. If * the fs is frozen, just skip the write as we could deadlock * the system otherwise.
*/ if (check_free_space(acct) && file_start_write_trylock(file)) { /* it's been opened O_APPEND, so position is irrelevant */
loff_t pos = 0;
__kernel_write(file, ac, sizeof(acct_t), &pos);
file_end_write(file);
}
/* Accounting records are not subject to resource limits. */
flim = rlimit(RLIMIT_FSIZE);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
fill_ac(acct);
acct_write_process(acct);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
}
/** * acct_collect - collect accounting information into pacct_struct * @exitcode: task exit code * @group_dead: not 0, if this thread is the last one in the process.
*/ void acct_collect(long exitcode, int group_dead)
{ struct pacct_struct *pacct = ¤t->signal->pacct;
u64 utime, stime; unsignedlong vsize = 0;
if (group_dead && current->mm) { struct mm_struct *mm = current->mm;
VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *vma;
/** * acct_process - handles process accounting for an exiting task
*/ void acct_process(void)
{ struct pid_namespace *ns;
/* * This loop is safe lockless, since current is still * alive and holds its namespace, which in turn holds * its parent.
*/ for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) { if (ns->bacct) break;
} if (unlikely(ns))
slow_acct_process(ns);
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.25 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.