/* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). * * We have two separate sets of flags: task->__state * is about runnability, while task->exit_state are * about the task exiting. Confusing, but this way * modifying one set can't modify the other one by * mistake.
*/
/* * Special states are those that do not use the normal wait-loop pattern. See * the comment with set_special_state().
*/ #define is_special_task_state(state) \
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \
TASK_DEAD | TASK_FROZEN))
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP # define debug_normal_state_change(state_value) \ do { \
WARN_ON_ONCE(is_special_task_state(state_value)); \
current->task_state_change = _THIS_IP_; \
} while (0)
# define debug_special_state_change(state_value) \ do { \
WARN_ON_ONCE(!is_special_task_state(state_value)); \
current->task_state_change = _THIS_IP_; \
} while (0)
# define debug_rtlock_wait_set_state() \ do { \
current->saved_state_change = current->task_state_change;\
current->task_state_change = _THIS_IP_; \
} while (0)
# define debug_rtlock_wait_restore_state() \ do { \
current->task_state_change = current->saved_state_change;\
} while (0)
#else # define debug_normal_state_change(cond) do { } while (0) # define debug_special_state_change(cond) do { } while (0) # define debug_rtlock_wait_set_state() do { } while (0) # define debug_rtlock_wait_restore_state() do { } while (0) #endif
#define trace_set_current_state(state_value) \ do { \ if (tracepoint_enabled(sched_set_state_tp)) \
__trace_set_current_state(state_value); \
} while (0)
/* * set_current_state() includes a barrier so that the write of current->__state * is correctly serialised wrt the caller's subsequent test of whether to * actually sleep: * * for (;;) { * set_current_state(TASK_UNINTERRUPTIBLE); * if (CONDITION) * break; * * schedule(); * } * __set_current_state(TASK_RUNNING); * * If the caller does not need such serialisation (because, for instance, the * CONDITION test and condition change and wakeup are under the same lock) then * use __set_current_state(). * * The above is typically ordered against the wakeup, which does: * * CONDITION = 1; * wake_up_state(p, TASK_UNINTERRUPTIBLE); * * where wake_up_state()/try_to_wake_up() executes a full memory barrier before * accessing p->__state. * * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is, * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). * * However, with slightly different timing the wakeup TASK_RUNNING store can * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not * a problem either because that will result in one extra go around the loop * and our @cond test will save the day. * * Also see the comments of try_to_wake_up().
*/ #define __set_current_state(state_value) \ do { \
debug_normal_state_change((state_value)); \
trace_set_current_state(state_value); \
WRITE_ONCE(current->__state, (state_value)); \
} while (0)
#define set_current_state(state_value) \ do { \
debug_normal_state_change((state_value)); \
trace_set_current_state(state_value); \
smp_store_mb(current->__state, (state_value)); \
} while (0)
/* * set_special_state() should be used for those states when the blocking task * can not use the regular condition based wait-loop. In that case we must * serialize against wakeups such that any possible in-flight TASK_RUNNING * stores will not collide with our state change.
*/ #define set_special_state(state_value) \ do { \ unsignedlong flags; /* may shadow */ \
\
raw_spin_lock_irqsave(¤t->pi_lock, flags); \
debug_special_state_change((state_value)); \
trace_set_current_state(state_value); \
WRITE_ONCE(current->__state, (state_value)); \
raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \
} while (0)
/* * PREEMPT_RT specific variants for "sleeping" spin/rwlocks * * RT's spin/rwlock substitutions are state preserving. The state of the * task when blocking on the lock is saved in task_struct::saved_state and * restored after the lock has been acquired. These operations are * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT * lock related wakeups while the task is blocked on the lock are * redirected to operate on task_struct::saved_state to ensure that these * are not dropped. On restore task_struct::saved_state is set to * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail. * * The lock operation looks like this: * * current_save_and_set_rtlock_wait_state(); * for (;;) { * if (try_lock()) * break; * raw_spin_unlock_irq(&lock->wait_lock); * schedule_rtlock(); * raw_spin_lock_irq(&lock->wait_lock); * set_current_state(TASK_RTLOCK_WAIT); * } * current_restore_rtlock_saved_state();
*/ #define current_save_and_set_rtlock_wait_state() \ do { \
lockdep_assert_irqs_disabled(); \
raw_spin_lock(¤t->pi_lock); \
current->saved_state = current->__state; \
debug_rtlock_wait_set_state(); \
trace_set_current_state(TASK_RTLOCK_WAIT); \
WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \
raw_spin_unlock(¤t->pi_lock); \
} while (0);
/* wrapper functions to trace from this header file */
DECLARE_TRACEPOINT(sched_set_state_tp); externvoid __trace_set_current_state(int state_value);
DECLARE_TRACEPOINT(sched_set_need_resched_tp); externvoid __trace_set_need_resched(struct task_struct *curr, int tif);
/** * struct prev_cputime - snapshot of system and user cputime * @utime: time spent in user mode * @stime: time spent in system mode * @lock: protects the above two fields * * Stores previous user/system time values such that we can guarantee * monotonicity.
*/ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
u64 utime;
u64 stime;
raw_spinlock_t lock; #endif
};
enum vtime_state { /* Task is sleeping or running in a CPU with VTIME inactive: */
VTIME_INACTIVE = 0, /* Task is idle */
VTIME_IDLE, /* Task runs in kernelspace in a CPU with VTIME active: */
VTIME_SYS, /* Task runs in userspace in a CPU with VTIME active: */
VTIME_USER, /* Task runs as guests in a CPU with VTIME active: */
VTIME_GUEST,
};
/* # of times we have run on this CPU: */ unsignedlong pcount;
/* Time spent waiting on a runqueue: */ unsignedlonglong run_delay;
/* Max time spent waiting on a runqueue: */ unsignedlonglong max_run_delay;
/* Min time spent waiting on a runqueue: */ unsignedlonglong min_run_delay;
/* Timestamps: */
/* When did we last run on a CPU? */ unsignedlonglong last_arrival;
/* When were we last queued to run? */ unsignedlonglong last_queued;
#endif/* CONFIG_SCHED_INFO */
};
/* * Integer metrics need fixed point arithmetic, e.g., sched/fair * has a few: load, load_avg, util_avg, freq, and capacity. * * We define a basic fixed point arithmetic range, and then formalize * all these metrics based on that basic range.
*/ # define SCHED_FIXEDPOINT_SHIFT 10 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
/* * The load/runnable/util_avg accumulates an infinite geometric series * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c). * * [load_avg definition] * * load_avg = runnable% * scale_load_down(load) * * [runnable_avg definition] * * runnable_avg = runnable% * SCHED_CAPACITY_SCALE * * [util_avg definition] * * util_avg = running% * SCHED_CAPACITY_SCALE * * where runnable% is the time ratio that a sched_entity is runnable and * running% the time ratio that a sched_entity is running. * * For cfs_rq, they are the aggregated values of all runnable and blocked * sched_entities. * * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU * capacity scaling. The scaling is done through the rq_clock_pelt that is used * for computing those signals (see update_rq_clock_pelt()) * * N.B., the above ratios (runnable% and running%) themselves are in the * range of [0, 1]. To do fixed point arithmetics, we therefore scale them * to as large a range as necessary. This is for example reflected by * util_avg's SCHED_CAPACITY_SCALE. * * [Overflow issue] * * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities * with the highest load (=88761), always runnable on a single cfs_rq, * and should not overflow as the number already hits PID_MAX_LIMIT. * * For all other cases (including 32-bit kernels), struct load_weight's * weight will overflow first before we do, because: * * Max(load_avg) <= Max(load.weight) * * Then it is the load_weight's responsibility to consider overflow * issues.
*/ struct sched_avg {
u64 last_update_time;
u64 load_sum;
u64 runnable_sum;
u32 util_sum;
u32 period_contrib; unsignedlong load_avg; unsignedlong runnable_avg; unsignedlong util_avg; unsignedint util_est;
} ____cacheline_aligned;
/* * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg * updates. When a task is dequeued, its util_est should not be updated if its * util_avg has not been updated in the meantime. * This information is mapped into the MSB bit of util_est at dequeue time. * Since max value of util_est for a task is 1024 (PELT util_avg for a task) * it is safe to use MSB.
*/ #define UTIL_EST_WEIGHT_SHIFT 2 #define UTIL_AVG_UNCHANGED 0x80000000
u64 exec_start;
u64 sum_exec_runtime;
u64 prev_sum_exec_runtime;
u64 vruntime; union { /* * When !@on_rq this field is vlag. * When cfs_rq->curr == se (which implies @on_rq) * this field is vprot. See protect_slice().
*/
s64 vlag;
u64 vprot;
};
u64 slice;
u64 nr_migrations;
#ifdef CONFIG_FAIR_GROUP_SCHED int depth; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; /* cached value of my_q->h_nr_running */ unsignedlong runnable_weight; #endif
/* * Per entity load average tracking. * * Put into separate cache line so it does not * collide with read-mostly values above.
*/ struct sched_avg avg;
};
/* * Original scheduling parameters. Copied here from sched_attr * during sched_setattr(), they will remain the same until * the next sched_setattr().
*/
u64 dl_runtime; /* Maximum runtime for each instance */
u64 dl_deadline; /* Relative deadline of each instance */
u64 dl_period; /* Separation of two instances (period) */
u64 dl_bw; /* dl_runtime / dl_period */
u64 dl_density; /* dl_runtime / dl_deadline */
/* * Actual scheduling parameters. Initialized with the values above, * they are continuously updated during task execution. Note that * the remaining runtime could be < 0 in case we are in overrun.
*/
s64 runtime; /* Remaining runtime for this instance */
u64 deadline; /* Absolute deadline for this instance */ unsignedint flags; /* Specifying the scheduler behaviour */
/* * Some bool flags: * * @dl_throttled tells if we exhausted the runtime. If so, the * task has to wait for a replenishment to be performed at the * next firing of dl_timer. * * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. * * @dl_non_contending tells if the task is inactive while still * contributing to the active utilization. In other words, it * indicates if the inactive timer has been armed and its handler * has not been executed yet. This flag is useful to avoid race * conditions between the inactive timer handler and the wakeup * code. * * @dl_overrun tells if the task asked to be informed about runtime * overruns. * * @dl_server tells if this is a server entity. * * @dl_defer tells if this is a deferred or regular server. For * now only defer server exists. * * @dl_defer_armed tells if the deferrable server is waiting * for the replenishment timer to activate it. * * @dl_server_active tells if the dlserver is active(started). * dlserver is started on first cfs enqueue on an idle runqueue * and is stopped when a dequeue results in 0 cfs tasks on the * runqueue. In other words, dlserver is active only when cpu's * runqueue has atleast one cfs task. * * @dl_defer_running tells if the deferrable server is actually * running, skipping the defer phase.
*/ unsignedint dl_throttled : 1; unsignedint dl_yielded : 1; unsignedint dl_non_contending : 1; unsignedint dl_overrun : 1; unsignedint dl_server : 1; unsignedint dl_server_active : 1; unsignedint dl_defer : 1; unsignedint dl_defer_armed : 1; unsignedint dl_defer_running : 1;
/* * Bandwidth enforcement timer. Each -deadline task has its * own bandwidth to be enforced, thus we need one timer per task.
*/ struct hrtimer dl_timer;
/* * Inactive timer, responsible for decreasing the active utilization * at the "0-lag time". When a -deadline task blocks, it contributes * to GRUB's active utilization until the "0-lag time", hence a * timer is needed to decrease the active utilization at the correct * time.
*/ struct hrtimer inactive_timer;
/* * Bits for DL-server functionality. Also see the comment near * dl_server_update(). * * @rq the runqueue this server is for * * @server_has_tasks() returns true if @server_pick return a * runnable task.
*/ struct rq *rq;
dl_server_pick_f server_pick_task;
#ifdef CONFIG_RT_MUTEXES /* * Priority Inheritance. When a DEADLINE scheduling entity is boosted * pi_se points to the donor, otherwise points to the dl_se it belongs * to (the original one/itself).
*/ struct sched_dl_entity *pi_se; #endif
};
#ifdef CONFIG_UCLAMP_TASK /* Number of utilization clamp buckets (shorter alias) */ #define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT
/* * Utilization clamp for a scheduling entity * @value: clamp value "assigned" to a se * @bucket_id: bucket index corresponding to the "assigned" value * @active: the se is currently refcounted in a rq's bucket * @user_defined: the requested clamp value comes from user-space * * The bucket_id is the index of the clamp bucket matching the clamp value * which is pre-computed and stored to avoid expensive integer divisions from * the fast path. * * The active bit is set whenever a task has got an "effective" value assigned, * which can be different from the clamp value "requested" from user-space. * This allows to know a task is refcounted in the rq's bucket corresponding * to the "effective" bucket_id. * * The user_defined bit is set whenever a task has got a task-specific clamp * value requested from userspace, i.e. the system defaults apply to this task * just as a restriction. This allows to relax default clamps when a less * restrictive task-specific value has been requested, thus allowing to * implement a "nice" semantic. For example, a task running with a 20% * default boost can still drop its own boosting to 0%.
*/ struct uclamp_se { unsignedint value : bits_per(SCHED_CAPACITY_SCALE); unsignedint bucket_id : bits_per(UCLAMP_BUCKETS); unsignedint active : 1; unsignedint user_defined : 1;
}; #endif/* CONFIG_UCLAMP_TASK */
union rcu_special { struct {
u8 blocked;
u8 need_qs;
u8 exp_hint; /* Hint for performance. */
u8 need_mb; /* Readers need smp_mb(). */
} b; /* Bits. */
u32 s; /* Set of bits. */
};
struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* * For reasons of header soup (see current_thread_info()), this * must be the first element of task_struct.
*/ struct thread_info thread_info; #endif unsignedint __state;
/* saved state for "spinlock sleepers" */ unsignedint saved_state;
/* * This begins the randomizable portion of task_struct. Only * scheduling-critical items should be added above here.
*/
randomized_struct_fields_start
void *stack;
refcount_t usage; /* Per task flags (PF_*), defined further below: */ unsignedint flags; unsignedint ptrace;
/* * recent_used_cpu is initially set as the last CPU used by a task * that wakes affine another task. Waker/wakee relationships can * push tasks around a CPU where each wakeup moves to the next one. * Tracking a recently used CPU allows a quick search for a recently * used CPU that may be idle.
*/ int recent_used_cpu; int wake_cpu; int on_rq;
int prio; int static_prio; int normal_prio; unsignedint rt_priority;
#ifdef CONFIG_UCLAMP_TASK /* * Clamp values requested for a scheduling entity. * Must be updated with task_rq_lock() held.
*/ struct uclamp_se uclamp_req[UCLAMP_CNT]; /* * Effective clamp values used for a scheduling entity. * Must be updated with task_rq_lock() held.
*/ struct uclamp_se uclamp[UCLAMP_CNT]; #endif
struct sched_statistics stats;
#ifdef CONFIG_PREEMPT_NOTIFIERS /* List of struct preempt_notifier: */ struct hlist_head preempt_notifiers; #endif
int exit_state; int exit_code; int exit_signal; /* The signal sent when the parent dies: */ int pdeath_signal; /* JOBCTL_*, siglock protected: */ unsignedlong jobctl;
/* Used for emulating ABI behavior of previous Linux versions: */ unsignedint personality;
/* Force alignment to the next boundary: */ unsigned :0;
/* Unserialized, strictly 'current' */
/* * This field must not be in the scheduler word above due to wakelist * queueing no longer being serialized by p->on_cpu. However: * * p->XXX = X; ttwu() * schedule() if (p->on_rq && ..) // false * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true * deactivate_task() ttwu_queue_wakelist()) * p->on_rq = 0; p->sched_remote_wakeup = Y; * * guarantees all stores of 'current' are visible before * ->sched_remote_wakeup gets used, so it can be in this word.
*/ unsigned sched_remote_wakeup:1; #ifdef CONFIG_RT_MUTEXES unsigned sched_rt_mutex:1; #endif
/* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; #ifndef TIF_RESTORE_SIGMASK unsigned restore_sigmask:1; #endif #ifdef CONFIG_MEMCG_V1 unsigned in_user_fault:1; #endif #ifdef CONFIG_LRU_GEN /* whether the LRU algorithm may apply to this access */ unsigned in_lru_fault:1; #endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif #ifdef CONFIG_CGROUPS /* disallow userland-initiated cgroup migration */ unsigned no_cgroup_migration:1; /* task is frozen/stopped (used by the cgroup freezer) */ unsigned frozen:1; #endif #ifdef CONFIG_BLK_CGROUP unsigned use_memdelay:1; #endif #ifdef CONFIG_PSI /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif #ifdef CONFIG_PAGE_OWNER /* Used by page_owner=on to detect recursion in page tracking. */ unsigned in_page_owner:1; #endif #ifdef CONFIG_EVENTFD /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd:1; #endif #ifdef CONFIG_ARCH_HAS_CPU_PASID unsigned pasid_activated:1; #endif #ifdef CONFIG_X86_BUS_LOCK_DETECT unsigned reported_split_lock:1; #endif #ifdef CONFIG_TASK_DELAY_ACCT /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif unsigned in_nf_duplicate:1; #ifdef CONFIG_PREEMPT_RT struct netdev_xmit net_xmit; #endif unsignedlong atomic_flags; /* Flags requiring atomic access. */
struct restart_block restart_block;
pid_t pid;
pid_t tgid;
#ifdef CONFIG_STACKPROTECTOR /* Canary value for the -fstack-protector GCC feature: */ unsignedlong stack_canary; #endif /* * Pointers to the (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid)
*/
/* Real parent process: */ struct task_struct __rcu *real_parent;
/* * Children/sibling form the list of natural children:
*/ struct list_head children; struct list_head sibling; struct task_struct *group_leader;
/* * 'ptraced' is the list of tasks this task is using ptrace() on. * * This includes both natural children and PTRACE_ATTACH targets. * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
*/ struct list_head ptraced; struct list_head ptrace_entry;
/* * executable name, excluding path. * * - normally initialized begin_new_exec() * - set it with set_task_comm() * - strscpy_pad() to ensure it is always NUL-terminated and * zero-padded * - task_lock() to ensure the operation is atomic and the name is * fully updated.
*/ char comm[TASK_COMM_LEN];
/* Thread group tracking: */
u64 parent_exec_id;
u64 self_exec_id;
/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
spinlock_t alloc_lock;
/* Protection of the PI data structures: */
raw_spinlock_t pi_lock;
struct wake_q_node wake_q;
#ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task: */ struct rb_root_cached pi_waiters; /* Updated under owner's pi_lock and rq lock */ struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ struct rt_mutex_waiter *pi_blocked_on; #endif
struct mutex *blocked_on; /* lock we're blocked on */
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER /* * Encoded lock address causing task block (lower 2 bits = type from * <linux/hung_task.h>). Accessed via hung_task_*() helpers.
*/ unsignedlong blocker; #endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP int non_block_count; #endif
#ifdef CONFIG_TRACE_IRQFLAGS struct irqtrace_events irqtrace; unsignedint hardirq_threaded;
u64 hardirq_chain_key; int softirqs_enabled; int softirq_context; int irq_config; #endif #ifdef CONFIG_PREEMPT_RT int softirq_disable_cnt; #endif
/* * This pointer is only modified for current in syscall and * pagefault context (and for tasks being destroyed), so it can be read * from any of the following contexts: * - RCU read-side critical section * - current->numa_group from everywhere * - task's runqueue locked, task not running
*/ struct numa_group __rcu *numa_group;
/* * numa_faults is an array split into four regions: * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer * in this precise order. * * faults_memory: Exponential decaying average of faults on a per-node * basis. Scheduling placement decisions are made based on these * counts. The values remain static for the duration of a PTE scan. * faults_cpu: Track the nodes the process was running on when a NUMA * hinting fault was incurred. * faults_memory_buffer and faults_cpu_buffer: Record faults per node * during the current scan window. When the scan completes, the counts * in faults_memory and faults_cpu decay and these values are copied.
*/ unsignedlong *numa_faults; unsignedlong total_numa_faults;
/* * numa_faults_locality tracks if faults recorded during the last * scan window were remote/local or failed to migrate. The task scan * period is adapted based on the locality of the faults with different * weights depending on whether they were shared or private faults
*/ unsignedlong numa_faults_locality[3];
#ifdef CONFIG_RSEQ struct rseq __user *rseq;
u32 rseq_len;
u32 rseq_sig; /* * RmW on rseq_event_mask must be performed atomically * with respect to preemption.
*/ unsignedlong rseq_event_mask; # ifdef CONFIG_DEBUG_RSEQ /* * This is a place holder to save a copy of the rseq fields for * validation of read-only fields. The struct rseq has a * variable-length array at the end, so it cannot be used * directly. Reserve a size large enough for the known fields.
*/ char rseq_fields[sizeof(struct rseq)]; # endif #endif
#ifdef CONFIG_SCHED_MM_CID int mm_cid; /* Current cid in mm */ int last_mm_cid; /* Most recent cid in mm */ int migrate_from_cpu; int mm_cid_active; /* Whether cid bitmap is active */ struct callback_head cid_work; #endif
struct tlbflush_unmap_batch tlb_ubc;
/* Cache last used pipe for splice(): */ struct pipe_inode_info *splice_pipe;
#ifdef CONFIG_FAULT_INJECTION int make_it_fail; unsignedint fail_nth; #endif /* * When (nr_dirtied >= nr_dirtied_pause), it's time to call * balance_dirty_pages() for a dirty throttling pause:
*/ int nr_dirtied; int nr_dirtied_pause; /* Start of a write-and-pause period: */ unsignedlong dirty_paused_when;
#ifdef CONFIG_LATENCYTOP int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; #endif /* * Time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds.
*/
u64 timer_slack_ns;
u64 default_timer_slack_ns;
#ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH /* * If L1D flush is supported on mm context switch * then we use this callback head to queue kill work * to kill tasks that are not running on SMT disabled * cores
*/ struct callback_head l1d_flush_kill; #endif
#ifdef CONFIG_RV /* * Per-task RV monitor, fixed in CONFIG_RV_PER_TASK_MONITORS. * If memory becomes a concern, we can think about a dynamic method.
*/ union rv_task_monitor rv[CONFIG_RV_PER_TASK_MONITORS]; #endif
/* CPU-specific state of this task: */ struct thread_struct thread;
/* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct.
*/
randomized_struct_fields_end
} __attribute__ ((aligned (64)));
if ((tsk_state & TASK_IDLE) == TASK_IDLE)
state = TASK_REPORT_IDLE;
/* * We're lying here, but rather than expose a completely new task state * to userspace, we can make this appear as if the task has gone through * a regular rt_mutex_lock() call. * Report frozen tasks as uninterruptible.
*/ if ((tsk_state & TASK_RTLOCK_WAIT) || (tsk_state & TASK_FROZEN))
state = TASK_UNINTERRUPTIBLE;
/* * Per process flags
*/ #define PF_VCPU 0x00000001 /* I'm a virtual CPU */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ #define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */ #define PF_IO_WORKER 0x00000010 /* Task is an IO worker */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ #define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* Dumped core */ #define PF_SIGNALED 0x00000400 /* Killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_KCOMPACTD 0x00010000 /* I am kcompactd */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ #define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to,
* I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF__HOLE__00800000 0x00800000 #define PF__HOLE__01000000 0x01000000 #define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning.
* See memalloc_pin_save() */ #define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */ #define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
/* * Only the _current_ task can read/write to tsk->flags, but other * tasks can access tsk->flags in readonly mode for example * with tsk_used_math (like during threaded core dumping). * There is however an exception to this rule during ptrace * or during fork: the ptracer task is allowed to write to the * child->flags of its traced child (same goes for fork, the parent * can write to the child->flags), because we're guaranteed the * child is not running and in turn not changing child->flags * at the same time the parent does it.
*/ #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) #define clear_used_math() clear_stopped_child_used_math(current) #define set_used_math() set_stopped_child_used_math(current)
#define conditional_stopped_child_used_math(condition, child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
#define copy_to_stopped_child_used_math(child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current)
/** * task_nice - return the nice value of a given task. * @p: the task in question. * * Return: The nice value [ -20 ... 0 ... 19 ].
*/ staticinlineint task_nice(conststruct task_struct *p)
{ return PRIO_TO_NICE((p)->static_prio);
}
/* * find a task by one of its numerical ids * * find_task_by_pid_ns(): * finds a task by its pid in the specified namespace * find_task_by_vpid(): * finds a task by its virtual pid * * see also find_vpid() etc in include/linux/pid.h
*/
/* * - Why not use task_lock()? * User space can randomly change their names anyway, so locking for readers * doesn't make sense. For writers, locking is probably necessary, as a race * condition could lead to long-term mixed results. * The strscpy_pad() in __set_task_comm() can ensure that the task comm is * always NUL-terminated and zero-padded. Therefore the race condition between * reader and writer is not an issue. * * - BUILD_BUG_ON() can help prevent the buf from being truncated. * Since the callers don't perform any return value checks, this safeguard is * necessary.
*/ #define get_task_comm(buf, tsk) ({ \
BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN); \
strscpy_pad(buf, (tsk)->comm); \
buf; \
})
static __always_inline void scheduler_ipi(void)
{ /* * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting * TIF_NEED_RESCHED remotely (for the first time) will also send * this IPI.
*/
preempt_fold_need_resched();
}
/* * Set thread flags in other task's structures. * See asm/thread_info.h for TIF_xxxx flags available:
*/ staticinlinevoid set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
set_ti_thread_flag(task_thread_info(tsk), flag);
}
staticinlinevoid clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{
clear_ti_thread_flag(task_thread_info(tsk), flag);
}
/* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return * value indicates whether a reschedule was done in fact. * cond_resched_lock() will drop the spinlock before scheduling,
*/ #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) externint __cond_resched(void);
#ifndef CONFIG_PREEMPT_RT /* * Non RT kernels have an elevated preempt count due to the held lock, * but are not allowed to be inside a RCU read side critical section
*/ # define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET #else /* * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in * cond_resched*lock() has to take that into account because it checks for * preempt_count() and rcu_preempt_depth().
*/ # define PREEMPT_LOCK_RESCHED_OFFSETS \
(PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT)) #endif
WARN_ON_ONCE(!m); /* The task should only be setting itself as blocked */
WARN_ON_ONCE(p != current); /* Currently we serialize blocked_on under the mutex::wait_lock */
lockdep_assert_held_once(&m->wait_lock); /* * Check ensure we don't overwrite existing mutex value * with a different mutex. Note, setting it to the same * lock repeatedly is ok.
*/
WARN_ON_ONCE(blocked_on && blocked_on != m);
WRITE_ONCE(p->blocked_on, m);
}
/* Currently we serialize blocked_on under the mutex::wait_lock */
lockdep_assert_held_once(&m->wait_lock); /* * There may be cases where we re-clear already cleared * blocked_on relationships, but make sure we are not * clearing the relationship with a different lock.
*/
WARN_ON_ONCE(blocked_on && blocked_on != m);
}
WRITE_ONCE(p->blocked_on, NULL);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.