/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* * Copyright 2014-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE.
*/
/* * When working with cp scheduler we should assign the HIQ manually or via * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot * definitions for Kaveri. In Kaveri only the first ME queues participates * in the cp scheduling taking that in mind we set the HIQ slot in the * second ME.
*/ #define KFD_CIK_HIQ_PIPE 4 #define KFD_CIK_HIQ_QUEUE 0
/* * Size of the per-process TBA+TMA buffer: 2 pages * * The first chunk is the TBA used for the CWSR ISA code. The second * chunk is used as TMA for user-mode trap handler setup in daisy-chain mode.
*/ #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) #define KFD_CWSR_TMA_OFFSET (PAGE_SIZE + 2048)
/* KFD_UNMAP_LATENCY_MS is the timeout CP waiting for SDMA preemption. One XCC * can be associated to 2 SDMA engines. queue_preemption_timeout_ms is the time * driver waiting for CP returning the UNMAP_QUEUE fence. Thus the math is * queue_preemption_timeout_ms = sdma_preemption_time * 2 + cp workload * The format here makes CP workload 10% of total timeout
*/ #define KFD_UNMAP_LATENCY_MS \
((queue_preemption_timeout_ms - queue_preemption_timeout_ms / 10) >> 1)
#define KFD_MAX_SDMA_QUEUES 128
/* * 512 = 0x200 * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA. * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE.
*/ #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
/** * enum kfd_ioctl_flags - KFD ioctl flags * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how * userspace can use a given ioctl.
*/ enum kfd_ioctl_flags { /* * @KFD_IOC_FLAG_CHECKPOINT_RESTORE: * Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially * perform privileged operations and load arbitrary data into MQDs and * eventually HQD registers when the queue is mapped by HWS. In order to * prevent this we should perform additional security checks. * * This is equivalent to callers with the CHECKPOINT_RESTORE capability. * * Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE, * we also allow ioctls with SYS_ADMIN capability.
*/
KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0),
}; /* * Kernel module parameter to specify maximum number of supported queues per * device
*/ externint max_num_of_queues_per_device;
/* Kernel module parameter to specify the scheduling policy */ externint sched_policy;
/* * Kernel module parameter to specify the maximum process * number per HW scheduler
*/ externint hws_max_conc_proc;
externint cwsr_enable;
/* * Kernel module parameter to specify whether to send sigterm to HSA process on * unhandled exception
*/ externint send_sigterm;
/* * This kernel module is used to simulate large bar machine on non-large bar * enabled machines.
*/ externint debug_largebar;
/* Set sh_mem_config.retry_disable on GFX v9 */ externint amdgpu_noretry;
/* Halt if HWS hang is detected */ externint halt_if_hws_hang;
/* Whether MEC FW support GWS barriers */ externbool hws_gws_support;
/* Queue preemption timeout in ms */ externint queue_preemption_timeout_ms;
/* * Don't evict process queues on vm fault
*/ externint amdgpu_no_queue_eviction_on_vm_fault;
struct kfd_node { unsignedint node_id; struct amdgpu_device *adev; /* Duplicated here along with keeping * a copy in kfd_dev to save a hop
*/ conststruct kfd2kgd_calls *kfd2kgd; /* Duplicated here along with * keeping a copy in kfd_dev to * save a hop
*/ struct kfd_vmid_info vm_info; unsignedint id; /* topology stub index */
uint32_t xcc_mask; /* Instance mask of XCCs present */ struct amdgpu_xcp *xcp;
/* * Interrupts of interest to KFD are copied * from the HW ring into a SW ring.
*/ bool interrupts_active;
uint32_t interrupt_bitmap; /* Only used for GFX 9.4.3 */
/** * struct queue_properties * * @type: The queue type. * * @queue_id: Queue identifier. * * @queue_address: Queue ring buffer address. * * @queue_size: Queue ring buffer size. * * @priority: Defines the queue priority relative to other queues in the * process. * This is just an indication and HW scheduling may override the priority as * necessary while keeping the relative prioritization. * the priority granularity is from 0 to f which f is the highest priority. * currently all queues are initialized with the highest priority. * * @queue_percent: This field is partially implemented and currently a zero in * this field defines that the queue is non active. * * @read_ptr: User space address which points to the number of dwords the * cp read from the ring buffer. This field updates automatically by the H/W. * * @write_ptr: Defines the number of dwords written to the ring buffer. * * @doorbell_ptr: Notifies the H/W of new packet written to the queue ring * buffer. This field should be similar to write_ptr and the user should * update this field after updating the write_ptr. * * @doorbell_off: The doorbell offset in the doorbell pci-bar. * * @is_interop: Defines if this is a interop queue. Interop queue means that * the queue can access both graphics and compute resources. * * @is_evicted: Defines if the queue is evicted. Only active queues * are evicted, rendering them inactive. * * @is_active: Defines if the queue is active or not. @is_active and * @is_evicted are protected by the DQM lock. * * @is_gws: Defines if the queue has been updated to be GWS-capable or not. * @is_gws should be protected by the DQM lock, since changing it can yield the * possibility of updating DQM state on number of GWS queues. * * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid * of the queue. * * This structure represents the queue properties for each queue no matter if * it's user mode or kernel mode queue. *
*/
enum mqd_update_flag {
UPDATE_FLAG_DBG_WA_ENABLE = 1,
UPDATE_FLAG_DBG_WA_DISABLE = 2,
UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */
};
struct mqd_update_info { union { struct {
uint32_t count; /* Must be a multiple of 32 */
uint32_t *ptr;
} cu_mask;
}; enum mqd_update_flag update_flag;
};
/** * struct queue * * @list: Queue linked list. * * @mqd: The queue MQD (memory queue descriptor). * * @mqd_mem_obj: The MQD local gpu memory object. * * @gart_mqd_addr: The MQD gart mc address. * * @properties: The queue properties. * * @mec: Used only in no cp scheduling mode and identifies to micro engine id * that the queue should be executed on. * * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe * id. * * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. * * @process: The kfd process that created this queue. * * @device: The kfd device that created this queue. * * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL * otherwise. * * This structure represents user mode compute queues. * It contains all the necessary data to handle such queues. *
*/
/* This flag tells if we should reset all wavefronts on * process termination
*/ bool reset_wavefronts;
/* This flag tells us if this process has a GWS-capable * queue that will be mapped into the runlist. It's * possible to request a GWS BO, but not have the queue * currently mapped, and this changes how the MAP_PROCESS * PM4 packet is configured.
*/ bool mapped_gws_queue;
/* All the memory management data should be here too */
uint64_t gds_context_area; /* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */
uint64_t page_table_base;
uint32_t sh_mem_config;
uint32_t sh_mem_bases;
uint32_t sh_mem_ape1_base;
uint32_t sh_mem_ape1_limit;
uint32_t gds_size;
uint32_t num_gws;
uint32_t num_oac;
uint32_t sh_hidden_private_base;
/* doorbells for kfd process */ struct amdgpu_bo *proc_doorbells;
/* bitmap for dynamic doorbell allocation from the bo */ unsignedlong *doorbell_bitmap;
};
/* KFD Memory Eviction */
/* Approx. wait time before attempting to restore evicted BOs */ #define PROCESS_RESTORE_TIME_MS 100 /* Approx. back off time if restore fails due to lack of memory */ #define PROCESS_BACK_OFF_TIME_MS 100 /* Approx. time before evicting the process again */ #define PROCESS_ACTIVE_TIME_MS 10
/* 8 byte handle containing GPU ID in the most significant 4 bytes and * idr_handle in the least significant 4 bytes
*/ #define MAKE_HANDLE(gpu_id, idr_handle) \
(((uint64_t)(gpu_id) << 32) + idr_handle) #define GET_GPU_ID(handle) (handle >> 32) #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
/* * SDMA counter runs at 100MHz frequency. * We display SDMA activity in microsecond granularity in sysfs. * As a result, the divisor is 100.
*/ #define SDMA_ACTIVITY_DIVISOR 100
/* Data that is per-process-per device. */ struct kfd_process_device { /* The device that owns this data. */ struct kfd_node *dev;
/* The process that owns this kfd_process_device. */ struct kfd_process *process;
/* per-process-per device QCM data structure */ struct qcm_process_device qpd;
/* Flag used to tell the pdd has dequeued from the dqm. * This is used to prevent dev->dqm->ops.process_termination() from * being called twice when it is already called in IOMMU callback * function.
*/ bool already_dequeued; bool runtime_inuse;
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ enum kfd_pdd_bound bound;
/* * @cu_occupancy: Reports occupancy of Compute Units (CU) of a process * that is associated with device encoded by "this" struct instance. The * value reflects CU usage by all of the waves launched by this process * on this device. A very important property of occupancy parameter is * that its value is a snapshot of current use. * * Following is to be noted regarding how this parameter is reported: * * The number of waves that a CU can launch is limited by couple of * parameters. These are encoded by struct amdgpu_cu_info instance * that is part of every device definition. For GFX9 devices this * translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves * do not use scratch memory and 32 waves (max_scratch_slots_per_cu) * when they do use scratch memory. This could change for future * devices and therefore this example should be considered as a guide. * * All CU's of a device are available for the process. This may not be true * under certain conditions - e.g. CU masking. * * Finally number of CU's that are occupied by a process is affected by both * number of CU's a device has along with number of other competing processes
*/ struct attribute attr_cu_occupancy;
/* * If this process has been checkpointed before, then the user * application will use the original gpu_id on the * checkpointed node to refer to this device.
*/
uint32_t user_gpu_id;
/* Default granularity to use in buffer migration * and restoration of backing memory while handling * recoverable page faults
*/
uint8_t default_granularity;
};
/* Process data */ struct kfd_process { /* * kfd_process are stored in an mm_struct*->kfd_process* * hash table (kfd_processes in kfd_process.c)
*/ struct hlist_node kfd_processes;
/* * Opaque pointer to mm_struct. We don't hold a reference to * it so it should never be dereferenced from here. This is * only used for looking up processes by their mm.
*/ void *mm;
struct kref ref; struct work_struct release_work;
struct mutex mutex;
/* * In any process, the thread that started main() is the lead * thread and outlives the rest. * It is here because amd_iommu_bind_pasid wants a task_struct. * It can also be used for safely getting a reference to the * mm_struct of the process.
*/ struct task_struct *lead_thread;
/* We want to receive a notification when the mm_struct is destroyed */ struct mmu_notifier mmu_notifier;
/* * Array of kfd_process_device pointers, * one for each device the process is using.
*/ struct kfd_process_device *pdds[MAX_GPU_INSTANCE];
uint32_t n_pdds;
struct process_queue_manager pqm;
/*Is the user space process 32 bit?*/ bool is_32bit_user_mode;
/* Information used for memory eviction */ void *kgd_process_info; /* Eviction fence that is attached to all the BOs of this process. The * fence will be triggered during eviction and new one will be created * during restore
*/ struct dma_fence __rcu *ef;
/* Work items for evicting and restoring BOs */ struct delayed_work eviction_work; struct delayed_work restore_work; /* seqno of the last scheduled eviction */ unsignedint last_eviction_seqno; /* Approx. the last timestamp (in jiffies) when the process was * restored after an eviction
*/ unsignedlong last_restore_timestamp;
/* Indicates device process is debug attached with reserved vmid. */ bool debug_trap_enabled;
/* If the process is a kfd debugger, we need to know so we can clean * up at exit time. If a process enables debugging on itself, it does * its own clean-up, so we don't set the flag here. We track this by * counting the number of processes this process is debugging.
*/
atomic_t debugged_process_count;
/* If the process is a debugged, this is the debugger process */ struct kfd_process *debugger_process;
int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process, struct vm_area_struct *vma);
/* KFD process API for creating and translating handles */ int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, void *mem); void *kfd_process_device_translate_handle(struct kfd_process_device *p, int handle); void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, int handle); struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
/* CRIU */ /* * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private * structures: * kfd_criu_process_priv_data * kfd_criu_device_priv_data * kfd_criu_bo_priv_data * kfd_criu_queue_priv_data * kfd_criu_event_priv_data * kfd_criu_svm_range_priv_data
*/
/* * The first 4 bytes of kfd_criu_queue_priv_data, kfd_criu_event_priv_data, * kfd_criu_svm_range_priv_data is the object type
*/ enum kfd_criu_object_type {
KFD_CRIU_OBJECT_TYPE_QUEUE,
KFD_CRIU_OBJECT_TYPE_EVENT,
KFD_CRIU_OBJECT_TYPE_SVM_RANGE,
};
/** * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring * dequeue wait counts. * * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a * certain ASICs. For these ASICs, this is default value used by RESET * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized value * for certain ASICs. For others set it to default hardware reset value * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait *
*/ enum kfd_config_dequeue_wait_counts_cmd {
KFD_DEQUEUE_WAIT_INIT = 1,
KFD_DEQUEUE_WAIT_RESET = 2,
KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3
};
struct packet_manager_funcs { /* Support ASIC-specific packet formats for PM4 packets */ int (*map_process)(struct packet_manager *pm, uint32_t *buffer, struct qcm_process_device *qpd); int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
uint64_t ib, size_t ib_size_in_dwords, bool chain); int (*set_resources)(struct packet_manager *pm, uint32_t *buffer, struct scheduling_resources *res); int (*map_queues)(struct packet_manager *pm, uint32_t *buffer, struct queue *q, bool is_static); int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, enum kfd_unmap_queues_filter mode,
uint32_t filter_param, bool reset); int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t *buffer, enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t value); int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t fence_value); int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
/* Packet sizes */ int map_process_size; int runlist_size; int set_resources_size; int map_queues_size; int unmap_queues_size; int config_dequeue_wait_counts_size; int query_status_size; int release_mem_size;
};
void kfd_debugfs_init(void); void kfd_debugfs_fini(void); int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data); int pqm_debugfs_mqds(struct seq_file *m, void *data); int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data); int dqm_debugfs_hqds(struct seq_file *m, void *data); int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); int pm_debugfs_runlist(struct seq_file *m, void *data);
int kfd_debugfs_hang_hws(struct kfd_node *dev); int pm_debugfs_hang_hws(struct packet_manager *pm); int dqm_debugfs_hang_hws(struct device_queue_manager *dqm);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.