/** * DOC: Scheduler * * Mali CSF hardware adopts a firmware-assisted scheduling model, where * the firmware takes care of scheduling aspects, to some extent. * * The scheduling happens at the scheduling group level, each group * contains 1 to N queues (N is FW/hardware dependent, and exposed * through the firmware interface). Each queue is assigned a command * stream ring buffer, which serves as a way to get jobs submitted to * the GPU, among other things. * * The firmware can schedule a maximum of M groups (M is FW/hardware * dependent, and exposed through the firmware interface). Passed * this maximum number of groups, the kernel must take care of * rotating the groups passed to the firmware so every group gets * a chance to have his queues scheduled for execution. * * The current implementation only supports with kernel-mode queues. * In other terms, userspace doesn't have access to the ring-buffer. * Instead, userspace passes indirect command stream buffers that are * called from the queue ring-buffer by the kernel using a pre-defined * sequence of command stream instructions to ensure the userspace driver * always gets consistent results (cache maintenance, * synchronization, ...). * * We rely on the drm_gpu_scheduler framework to deal with job * dependencies and submission. As any other driver dealing with a * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each * entity has its own job scheduler. When a job is ready to be executed * (all its dependencies are met), it is pushed to the appropriate * queue ring-buffer, and the group is scheduled for execution if it * wasn't already active. * * Kernel-side group scheduling is timeslice-based. When we have less * groups than there are slots, the periodic tick is disabled and we * just let the FW schedule the active groups. When there are more * groups than slots, we let each group a chance to execute stuff for * a given amount of time, and then re-evaluate and pick new groups * to schedule. The group selection algorithm is based on * priority+round-robin. * * Even though user-mode queues is out of the scope right now, the * current design takes them into account by avoiding any guess on the * group/queue state that would be based on information we wouldn't have * if userspace was in charge of the ring-buffer. That's also one of the * reason we don't do 'cooperative' scheduling (encoding FW group slot * reservation as dma_fence that would be returned from the * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as * a queue of waiters, ordered by job submission order). This approach * would work for kernel-mode queues, but would make user-mode queues a * lot more complicated to retrofit.
*/
/** * struct panthor_csg_slot - Command stream group slot * * This represents a FW slot for a scheduling group.
*/ struct panthor_csg_slot { /** @group: Scheduling group bound to this slot. */ struct panthor_group *group;
/** @priority: Group priority. */
u8 priority;
/** * @idle: True if the group bound to this slot is idle. * * A group is idle when it has nothing waiting for execution on * all its queues, or when queues are blocked waiting for something * to happen (synchronization object).
*/ bool idle;
};
/** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */
PANTHOR_CSG_PRIORITY_MEDIUM,
/** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */
PANTHOR_CSG_PRIORITY_HIGH,
/** * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group. * * Real-time priority allows one to preempt scheduling of other * non-real-time groups. When such a group becomes executable, * it will evict the group with the lowest non-rt priority if * there's no free group slot available.
*/
PANTHOR_CSG_PRIORITY_RT,
/** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */
PANTHOR_CSG_PRIORITY_COUNT,
};
/** * struct panthor_scheduler - Object used to manage the scheduler
*/ struct panthor_scheduler { /** @ptdev: Device. */ struct panthor_device *ptdev;
/** * @wq: Workqueue used by our internal scheduler logic and * drm_gpu_scheduler. * * Used for the scheduler tick, group update or other kind of FW * event processing that can't be handled in the threaded interrupt * path. Also passed to the drm_gpu_scheduler instances embedded * in panthor_queue.
*/ struct workqueue_struct *wq;
/** * @heap_alloc_wq: Workqueue used to schedule tiler_oom works. * * We have a queue dedicated to heap chunk allocation works to avoid * blocking the rest of the scheduler if the allocation tries to * reclaim memory.
*/ struct workqueue_struct *heap_alloc_wq;
/** @tick_work: Work executed on a scheduling tick. */ struct delayed_work tick_work;
/** * @sync_upd_work: Work used to process synchronization object updates. * * We use this work to unblock queues/groups that were waiting on a * synchronization object.
*/ struct work_struct sync_upd_work;
/** * @fw_events_work: Work used to process FW events outside the interrupt path. * * Even if the interrupt is threaded, we need any event processing * that require taking the panthor_scheduler::lock to be processed * outside the interrupt path so we don't block the tick logic when * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the * event processing requires taking this lock, we just delegate all * FW event processing to the scheduler workqueue.
*/ struct work_struct fw_events_work;
/** * @resched_target: When the next tick should occur. * * Expressed in jiffies.
*/
u64 resched_target;
/** * @last_tick: When the last tick occurred. * * Expressed in jiffies.
*/
u64 last_tick;
/** @tick_period: Tick period in jiffies. */
u64 tick_period;
/** * @lock: Lock protecting access to all the scheduler fields. * * Should be taken in the tick work, the irq handler, and anywhere the @groups * fields are touched.
*/ struct mutex lock;
/** @groups: Various lists used to classify groups. */ struct { /** * @runnable: Runnable group lists. * * When a group has queues that want to execute something, * its panthor_group::run_node should be inserted here. * * One list per-priority.
*/ struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT];
/** * @idle: Idle group lists. * * When all queues of a group are idle (either because they * have nothing to execute, or because they are blocked), the * panthor_group::run_node field should be inserted here. * * One list per-priority.
*/ struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT];
/** * @waiting: List of groups whose queues are blocked on a * synchronization object. * * Insert panthor_group::wait_node here when a group is waiting * for synchronization objects to be signaled. * * This list is evaluated in the @sync_upd_work work.
*/ struct list_head waiting;
} groups;
/** @csg_slot_count: Number of command stream group slots exposed by the FW. */
u32 csg_slot_count;
/** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */
u32 cs_slot_count;
/** @as_slot_count: Number of address space slots supported by the MMU. */
u32 as_slot_count;
/** @used_csg_slot_count: Number of command stream group slot currently used. */
u32 used_csg_slot_count;
/** @sb_slot_count: Number of scoreboard slots. */
u32 sb_slot_count;
/** * @might_have_idle_groups: True if an active group might have become idle. * * This will force a tick, so other runnable groups can be scheduled if one * or more active groups became idle.
*/ bool might_have_idle_groups;
/** @pm: Power management related fields. */ struct { /** @has_ref: True if the scheduler owns a runtime PM reference. */ bool has_ref;
} pm;
/** @reset: Reset related fields. */ struct { /** @lock: Lock protecting the other reset fields. */ struct mutex lock;
/** * @in_progress: True if a reset is in progress. * * Set to true in panthor_sched_pre_reset() and back to false in * panthor_sched_post_reset().
*/
atomic_t in_progress;
/** * @stopped_groups: List containing all groups that were stopped * before a reset. * * Insert panthor_group::run_node in the pre_reset path.
*/ struct list_head stopped_groups;
} reset;
};
/** * @status: Status. * * Not zero on failure.
*/
u32 status;
/** @pad: MBZ. */
u32 pad;
};
/** * struct panthor_queue - Execution queue
*/ struct panthor_queue { /** @scheduler: DRM scheduler used for this queue. */ struct drm_gpu_scheduler scheduler;
/** @entity: DRM scheduling entity used for this queue. */ struct drm_sched_entity entity;
/** * @remaining_time: Time remaining before the job timeout expires. * * The job timeout is suspended when the queue is not scheduled by the * FW. Every time we suspend the timer, we need to save the remaining * time so we can restore it later on.
*/ unsignedlong remaining_time;
/** @timeout_suspended: True if the job timeout was suspended. */ bool timeout_suspended;
/** * @doorbell_id: Doorbell assigned to this queue. * * Right now, all groups share the same doorbell, and the doorbell ID * is assigned to group_slot + 1 when the group is assigned a slot. But * we might decide to provide fine grained doorbell assignment at some * point, so don't have to wake up all queues in a group every time one * of them is updated.
*/
u8 doorbell_id;
/** * @priority: Priority of the queue inside the group. * * Must be less than 16 (Only 4 bits available).
*/
u8 priority; #define CSF_MAX_QUEUE_PRIO GENMASK(3, 0)
/** @input_fw_va: FW virtual address of the input interface buffer. */
u32 input_fw_va;
/** @output_fw_va: FW virtual address of the output interface buffer. */
u32 output_fw_va;
} iface;
/** * @syncwait: Stores information about the synchronization object this * queue is waiting on.
*/ struct { /** @gpu_va: GPU address of the synchronization object. */
u64 gpu_va;
/** @ref: Reference value to compare against. */
u64 ref;
/** @gt: True if this is a greater-than test. */ bool gt;
/** @sync64: True if this is a 64-bit sync object. */ bool sync64;
/** @seqno: Sequence number of the last initialized fence. */
atomic64_t seqno;
/** * @last_fence: Fence of the last submitted job. * * We return this fence when we get an empty command stream. * This way, we are guaranteed that all earlier jobs have completed * when drm_sched_job::s_fence::finished without having to feed * the CS ring buffer with a dummy job that only signals the fence.
*/ struct dma_fence *last_fence;
/** * @in_flight_jobs: List containing all in-flight jobs. * * Used to keep track and signal panthor_job::done_fence when the * synchronization object attached to the queue is signaled.
*/ struct list_head in_flight_jobs;
} fence_ctx;
/** @profiling: Job profiling data slots and access information. */ struct { /** @slots: Kernel BO holding the slots. */ struct panthor_kernel_bo *slots;
/** @slot_count: Number of jobs ringbuffer can hold at once. */
u32 slot_count;
/** @seqno: Index of the next available profiling information slot. */
u32 seqno;
} profiling;
};
/** * enum panthor_group_state - Scheduling group state.
*/ enum panthor_group_state { /** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */
PANTHOR_CS_GROUP_CREATED,
/** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */
PANTHOR_CS_GROUP_ACTIVE,
/** * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is * inactive/suspended right now.
*/
PANTHOR_CS_GROUP_SUSPENDED,
/** * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated. * * Can no longer be scheduled. The only allowed action is a destruction.
*/
PANTHOR_CS_GROUP_TERMINATED,
/** * @PANTHOR_CS_GROUP_UNKNOWN_STATE: Group is an unknown state. * * The FW returned an inconsistent state. The group is flagged unusable * and can no longer be scheduled. The only allowed action is a * destruction. * * When that happens, we also schedule a FW reset, to start from a fresh * state.
*/
PANTHOR_CS_GROUP_UNKNOWN_STATE,
};
/** @vm: VM bound to the group. */ struct panthor_vm *vm;
/** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */
u64 compute_core_mask;
/** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */
u64 fragment_core_mask;
/** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */
u64 tiler_core_mask;
/** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
u8 max_compute_cores;
/** @max_fragment_cores: Maximum number of shader cores used for fragment jobs. */
u8 max_fragment_cores;
/** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
u8 max_tiler_cores;
/** @priority: Group priority (check panthor_csg_priority). */
u8 priority;
/** @blocked_queues: Bitmask reflecting the blocked queues. */
u32 blocked_queues;
/** @idle_queues: Bitmask reflecting the idle queues. */
u32 idle_queues;
/** @fatal_lock: Lock used to protect access to fatal fields. */
spinlock_t fatal_lock;
/** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */
u32 fatal_queues;
/** @tiler_oom: Mask of queues that have a tiler OOM event to process. */
atomic_t tiler_oom;
/** @queue_count: Number of queues in this group. */
u32 queue_count;
/** @queues: Queues owned by this group. */ struct panthor_queue *queues[MAX_CS_PER_CSG];
/** * @csg_id: ID of the FW group slot. * * -1 when the group is not scheduled/active.
*/ int csg_id;
/** * @destroyed: True when the group has been destroyed. * * If a group is destroyed it becomes useless: no further jobs can be submitted * to its queues. We simply wait for all references to be dropped so we can * release the group object.
*/ bool destroyed;
/** * @timedout: True when a timeout occurred on any of the queues owned by * this group. * * Timeouts can be reported by drm_sched or by the FW. If a reset is required, * and the group can't be suspended, this also leads to a timeout. In any case, * any timeout situation is unrecoverable, and the group becomes useless. We * simply wait for all references to be dropped so we can release the group * object.
*/ bool timedout;
/** * @innocent: True when the group becomes unusable because the group suspension * failed during a reset. * * Sometimes the FW was put in a bad state by other groups, causing the group * suspension happening in the reset path to fail. In that case, we consider the * group innocent.
*/ bool innocent;
/** * @syncobjs: Pool of per-queue synchronization objects. * * One sync object per queue. The position of the sync object is * determined by the queue index.
*/ struct panthor_kernel_bo *syncobjs;
/** @fdinfo: Per-file info exposed through /proc/<process>/fdinfo */ struct { /** @data: Total sampled values for jobs in queues from this group. */ struct panthor_gpu_usage data;
/** * @fdinfo.lock: Spinlock to govern concurrent access from drm file's fdinfo * callback and job post-completion processing function
*/
spinlock_t lock;
/** @fdinfo.kbo_sizes: Aggregate size of private kernel BO's held by the group. */
size_t kbo_sizes;
} fdinfo;
/** @state: Group state. */ enum panthor_group_state state;
/** * @suspend_buf: Suspend buffer. * * Stores the state of the group and its queues when a group is suspended. * Used at resume time to restore the group in its previous state. * * The size of the suspend buffer is exposed through the FW interface.
*/ struct panthor_kernel_bo *suspend_buf;
/** * @protm_suspend_buf: Protection mode suspend buffer. * * Stores the state of the group and its queues when a group that's in * protection mode is suspended. * * Used at resume time to restore the group in its previous state. * * The size of the protection mode suspend buffer is exposed through the * FW interface.
*/ struct panthor_kernel_bo *protm_suspend_buf;
/** @sync_upd_work: Work used to check/signal job fences. */ struct work_struct sync_upd_work;
/** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */ struct work_struct tiler_oom_work;
/** @term_work: Work used to finish the group termination procedure. */ struct work_struct term_work;
/** * @release_work: Work used to release group resources. * * We need to postpone the group release to avoid a deadlock when * the last ref is released in the tick work.
*/ struct work_struct release_work;
/** * @run_node: Node used to insert the group in the * panthor_group::groups::{runnable,idle} and * panthor_group::reset.stopped_groups lists.
*/ struct list_head run_node;
/** * @wait_node: Node used to insert the group in the * panthor_group::groups::waiting list.
*/ struct list_head wait_node;
};
/** * group_queue_work() - Queue a group work * @group: Group to queue the work for. * @wname: Work name. * * Grabs a ref and queue a work item to the scheduler workqueue. If * the work was already queued, we release the reference we grabbed. * * Work callbacks must release the reference we grabbed here.
*/ #define group_queue_work(group, wname) \ do { \
group_get(group); \ if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \
group_put(group); \
} while (0)
/** * sched_queue_work() - Queue a scheduler work. * @sched: Scheduler object. * @wname: Work name. * * Conditionally queues a scheduler work if no reset is pending/in-progress.
*/ #define sched_queue_work(sched, wname) \ do { \ if (!atomic_read(&(sched)->reset.in_progress) && \
!panthor_device_reset_is_pending((sched)->ptdev)) \
queue_work((sched)->wq, &(sched)->wname ## _work); \
} while (0)
/** * sched_queue_delayed_work() - Queue a scheduler delayed work. * @sched: Scheduler object. * @wname: Work name. * @delay: Work delay in jiffies. * * Conditionally queues a scheduler delayed work if no reset is * pending/in-progress.
*/ #define sched_queue_delayed_work(sched, wname, delay) \ do { \ if (!atomic_read(&sched->reset.in_progress) && \
!panthor_device_reset_is_pending((sched)->ptdev)) \
mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
} while (0)
/* * We currently set the maximum of groups per file to an arbitrary low value. * But this can be updated if we need more.
*/ #define MAX_GROUPS_PER_POOL 128
/** * struct panthor_group_pool - Group pool * * Each file get assigned a group pool.
*/ struct panthor_group_pool { /** @xa: Xarray used to manage group handles. */ struct xarray xa;
};
/** * struct panthor_job - Used to manage GPU job
*/ struct panthor_job { /** @base: Inherit from drm_sched_job. */ struct drm_sched_job base;
/** @group: Group of the queue this job will be pushed to. */ struct panthor_group *group;
/** @queue_idx: Index of the queue inside @group. */
u32 queue_idx;
/** @call_info: Information about the userspace command stream call. */ struct { /** @start: GPU address of the userspace command stream. */
u64 start;
/** @size: Size of the userspace command stream. */
u32 size;
/** * @latest_flush: Flush ID at the time the userspace command * stream was built. * * Needed for the flush reduction mechanism.
*/
u32 latest_flush;
} call_info;
/** @ringbuf: Position of this job is in the ring buffer. */ struct { /** @start: Start offset. */
u64 start;
/** @end: End offset. */
u64 end;
} ringbuf;
/** * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs * list.
*/ struct list_head node;
/** @done_fence: Fence signaled when the job is finished or cancelled. */ struct dma_fence *done_fence;
if (queue->syncwait.kmap) return queue->syncwait.kmap + queue->syncwait.offset;
bo = panthor_vm_get_bo_for_va(group->vm,
queue->syncwait.gpu_va,
&queue->syncwait.offset); if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo))) goto err_put_syncwait_obj;
queue->syncwait.obj = &bo->base.base;
ret = drm_gem_vmap(queue->syncwait.obj, &map); if (drm_WARN_ON(&ptdev->base, ret)) goto err_put_syncwait_obj;
queue->syncwait.kmap = map.vaddr; if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap)) goto err_put_syncwait_obj;
/* Dummy doorbell allocation: doorbell is assigned to the group and * all queues use the same doorbell. * * TODO: Implement LRU-based doorbell assignment, so the most often * updated queues get their own doorbell, thus avoiding useless checks * on queues belonging to the same group that are rarely updated.
*/ for (u32 i = 0; i < group->queue_count; i++)
group->queues[i]->doorbell_id = csg_id + 1;
csg_slot->group = group;
return 0;
}
/** * group_unbind_locked() - Unbind a group from a slot. * @group: Group to unbind. * * Return: 0 on success, a negative error code otherwise.
*/ staticint
group_unbind_locked(struct panthor_group *group)
{ struct panthor_device *ptdev = group->ptdev; struct panthor_csg_slot *slot;
/* Tiler OOM events will be re-issued next time the group is scheduled. */
atomic_set(&group->tiler_oom, 0);
cancel_work(&group->tiler_oom_work);
for (u32 i = 0; i < group->queue_count; i++)
group->queues[i]->doorbell_id = -1;
slot->group = NULL;
group_put(group); return 0;
}
/** * cs_slot_prog_locked() - Program a queue slot * @ptdev: Device. * @csg_id: Group slot ID. * @cs_id: Queue slot ID. * * Program a queue slot with the queue information so things can start being * executed on this queue. * * The group slot must have a group bound to it already (group_bind_locked()).
*/ staticvoid
cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
{ struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id]; struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
/** * cs_slot_reset_locked() - Reset a queue slot * @ptdev: Device. * @csg_id: Group slot. * @cs_id: Queue slot. * * Change the queue slot state to STOP and suspend the queue timeout if * the queue is not blocked. * * The group slot must have a group bound to it (group_bind_locked()).
*/ staticint
cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
{ struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group; struct panthor_queue *queue = group->queues[cs_id];
/* If the queue is blocked, we want to keep the timeout running, so * we can detect unbounded waits and kill the group when that happens.
*/ if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
queue->timeout_suspended = true;
WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
}
return 0;
}
/** * csg_slot_sync_priority_locked() - Synchronize the group slot priority * @ptdev: Device. * @csg_id: Group slot ID. * * Group slot priority update happens asynchronously. When we receive a * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can * reflect it to our panthor_csg_slot object.
*/ staticvoid
csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
{ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; struct panthor_fw_csg_iface *csg_iface;
/** * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority * @ptdev: Device. * @csg_id: Group slot. * @cs_id: Queue slot. * * Queue state is updated on group suspend or STATUS_UPDATE event.
*/ staticvoid
cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
{ struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group; struct panthor_queue *queue = group->queues[cs_id]; struct panthor_fw_cs_iface *cs_iface =
panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id);
u32 status_wait_cond;
switch (cs_iface->output->status_blocked_reason) { case CS_STATUS_BLOCKED_REASON_UNBLOCKED: if (queue->iface.input->insert == queue->iface.output->extract &&
cs_iface->output->status_scoreboards == 0)
group->idle_queues |= BIT(cs_id); break;
case CS_STATUS_BLOCKED_REASON_SYNC_WAIT: if (list_empty(&group->wait_node)) {
list_move_tail(&group->wait_node,
&group->ptdev->scheduler->groups.waiting);
}
/* The queue is only blocked if there's no deferred operation * pending, which can be checked through the scoreboard status.
*/ if (!cs_iface->output->status_scoreboards)
group->blocked_queues |= BIT(cs_id);
csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
group = csg_slot->group;
if (!group) return;
old_state = group->state;
csg_state = csg_iface->output->ack & CSG_STATE_MASK; switch (csg_state) { case CSG_STATE_START: case CSG_STATE_RESUME:
new_state = PANTHOR_CS_GROUP_ACTIVE; break; case CSG_STATE_TERMINATE:
new_state = PANTHOR_CS_GROUP_TERMINATED; break; case CSG_STATE_SUSPEND:
new_state = PANTHOR_CS_GROUP_SUSPENDED; break; default: /* The unknown state might be caused by a FW state corruption, * which means the group metadata can't be trusted anymore, and * the SUSPEND operation might propagate the corruption to the * suspend buffers. Flag the group state as unknown to make * sure it's unusable after that point.
*/
drm_err(&ptdev->base, "Invalid state on CSG %d (state=%d)",
csg_id, csg_state);
new_state = PANTHOR_CS_GROUP_UNKNOWN_STATE; break;
}
if (old_state == new_state) return;
/* The unknown state might be caused by a FW issue, reset the FW to * take a fresh start.
*/ if (new_state == PANTHOR_CS_GROUP_UNKNOWN_STATE)
panthor_device_schedule_reset(ptdev);
if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
csg_slot_sync_queues_state_locked(ptdev, csg_id);
if (old_state == PANTHOR_CS_GROUP_ACTIVE) {
u32 i;
/* Reset the queue slots so we start from a clean * state when starting/resuming a new group on this * CSG slot. No wait needed here, and no ringbell * either, since the CS slot will only be re-used * on the next CSG start operation.
*/ for (i = 0; i < group->queue_count; i++) { if (group->queues[i])
cs_slot_reset_locked(ptdev, csg_id, i);
}
}
if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) { /* If this exception is unrecoverable, queue a reset, and make * sure we stop scheduling groups until the reset has happened.
*/
panthor_device_schedule_reset(ptdev);
cancel_delayed_work(&sched->tick_work);
} else {
sched_queue_delayed_work(sched, tick, 0);
}
/* The group got scheduled out, we stop here. We will get a new tiler OOM event * when it's scheduled again.
*/ if (unlikely(csg_id < 0)) return 0;
if (IS_ERR(heaps) || frag_end > vt_end || vt_end >= vt_start) {
ret = -EINVAL;
} else { /* We do the allocation without holding the scheduler lock to avoid * blocking the scheduling.
*/
ret = panthor_heap_grow(heaps, heap_address,
renderpasses_in_flight,
pending_frag_count, &new_chunk_va);
}
/* If the heap context doesn't have memory for us, we want to let the * FW try to reclaim memory by waiting for fragment jobs to land or by * executing the tiler OOM exception handler, which is supposed to * implement incremental rendering.
*/ if (ret && ret != -ENOMEM) {
drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
group->fatal_queues |= BIT(cs_id);
sched_queue_delayed_work(sched, tick, 0); goto out_put_heap_pool;
}
/* We allocated a chunck, but couldn't link it to the heap * context because the group was scheduled out while we were * allocating memory. We need to return this chunk to the heap.
*/ if (unlikely(csg_id < 0 && new_chunk_va))
panthor_heap_return_chunk(heaps, heap_address, new_chunk_va);
/* We don't use group_queue_work() here because we want to queue the * work item to the heap_alloc_wq.
*/
group_get(group); if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work))
group_put(group);
}
if (events & CS_FATAL)
cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id);
if (events & CS_FAULT)
cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id);
if (events & CS_TILER_OOM)
cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
/* We don't acknowledge the TILER_OOM event since its handling is * deferred to a separate work.
*/
panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT);
/* Schedule a tick so we can evict idle groups and schedule non-idle * ones. This will also update runtime PM and devfreq busy/idle states, * so the device can lower its frequency or get suspended.
*/
sched_queue_delayed_work(sched, tick, 0);
}
/* There may not be any pending CSG/CS interrupts to process */ if (req == ack && cs_irq_req == cs_irq_ack) return;
/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before * examining the CS_ACK & CS_REQ bits. This would ensure that Host * doesn't miss an interrupt for the CS in the race scenario where * whilst Host is servicing an interrupt for the CS, firmware sends * another interrupt for that CS.
*/
csg_iface->input->cs_irq_ack = cs_irq_req;
/* Acknowledge the idle event and schedule a tick. */
panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
sched_queue_delayed_work(ptdev->scheduler, tick, 0);
}
/** * sched_process_global_irq_locked() - Process the scheduling part of a global IRQ * @ptdev: Device.
*/ staticvoid sched_process_global_irq_locked(struct panthor_device *ptdev)
{ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
u32 req, ack, evts;
if (!full_tick) {
list_add_tail(&group->run_node, &ctx->old_groups[group->priority]); return;
}
/* Rotate to make sure groups with lower CSG slot * priorities have a chance to get a higher CSG slot * priority next time they get picked. This priority * has an impact on resource request ordering, so it's * important to make sure we don't let one group starve * all other groups with the same group priority.
*/
list_for_each_entry(other_group,
&ctx->old_groups[csg_slot->group->priority],
run_node) { struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];
if (other_csg_slot->priority > csg_slot->priority) {
list_add_tail(&csg_slot->group->run_node, &other_group->run_node); return;
}
}
ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT; for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
INIT_LIST_HEAD(&ctx->groups[i]);
INIT_LIST_HEAD(&ctx->old_groups[i]);
}
for (i = 0; i < sched->csg_slot_count; i++) { struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; struct panthor_group *group = csg_slot->group; struct panthor_fw_csg_iface *csg_iface;
/* If there was unhandled faults on the VM, force processing of * CSG IRQs, so we can flag the faulty queue.
*/ if (panthor_vm_has_unhandled_faults(group->vm)) {
sched_process_csg_irq_locked(ptdev, i);
/* No fatal fault reported, flag all queues as faulty. */ if (!group->fatal_queues)
group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
}
for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) {
list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) { /* If everything went fine, we should only have groups * to be terminated in the old_groups lists.
*/
drm_WARN_ON(&ptdev->base, !ctx->csg_upd_failed_mask &&
group_can_run(group));
for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { /* If everything went fine, the groups to schedule lists should * be empty.
*/
drm_WARN_ON(&ptdev->base,
!ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); if (ret) {
panthor_device_schedule_reset(ptdev);
ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask; return;
}
/* Unbind evicted groups. */ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
list_for_each_entry(group, &ctx->old_groups[prio], run_node) { /* This group is gone. Process interrupts to clear * any pending interrupts before we start the new * group.
*/ if (group->csg_id >= 0)
sched_process_csg_irq_locked(ptdev, group->csg_id);
group_unbind_locked(group);
}
}
for (i = 0; i < sched->csg_slot_count; i++) { if (!sched->csg_slots[i].group)
free_csg_slots |= BIT(i);
}
/* If the group has been destroyed while we were * scheduling, ask for an immediate tick to * re-evaluate as soon as possible and get rid of * this dangling group.
*/ if (group->destroyed)
ctx->immediate_tick = true;
group_put(group);
}
/* Return evicted groups to the idle or run queues. Groups * that can no longer be run (because they've been destroyed * or experienced an unrecoverable error) will be scheduled * for destruction in tick_ctx_cleanup().
*/
list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) { if (!group_can_run(group)) continue;
if (group_is_idle(group))
list_move_tail(&group->run_node, &sched->groups.idle[prio]); else
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.