/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*- * * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Fremont, California. * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. * Copyright 2014 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Kevin E. Martin <martin@valinux.com> * Gareth Hughes <gareth@valinux.com> * Keith Whitwell <keith@tungstengraphics.com>
*/
/** * DOC: memory domains * * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible. * Memory in this pool could be swapped out to disk if there is pressure. * * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the * GPU's virtual address space via gart. Gart memory linearizes non-contiguous * pages of system memory, allows GPU access system memory in a linearized * fashion. * * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory * carved out by the BIOS. * * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data * across shader threads. * * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the * execution of all the waves on a device. * * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines * for appending data. * * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for * signalling user mode queues.
*/ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 #define AMDGPU_GEM_DOMAIN_VRAM 0x4 #define AMDGPU_GEM_DOMAIN_GDS 0x8 #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 #define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \
AMDGPU_GEM_DOMAIN_GTT | \
AMDGPU_GEM_DOMAIN_VRAM | \
AMDGPU_GEM_DOMAIN_GDS | \
AMDGPU_GEM_DOMAIN_GWS | \
AMDGPU_GEM_DOMAIN_OA | \
AMDGPU_GEM_DOMAIN_DOORBELL)
/* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) /* Flag that CPU access will not work, this VRAM domain is invisible */ #define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) /* Flag that USWC attributes should be used for GTT */ #define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) /* Flag that the memory should be in VRAM and cleared */ #define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) /* Flag that allocating the BO should use linear VRAM */ #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) /* Flag that BO is always valid in this VM */ #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) /* Flag that BO sharing will be explicitly synchronized */ #define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) /* Flag that indicates allocating MQD gart on GFX9, where the mtype * for the second page onward should be set to NC. It should never * be used by user space applications.
*/ #define AMDGPU_GEM_CREATE_CP_MQD_GFX9 (1 << 8) /* Flag that BO may contain sensitive data that must be wiped before * releasing the memory
*/ #define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) /* Flag that BO will be encrypted and that the TMZ bit should be * set in the PTEs when mapping this buffer via GPUVM or * accessing it with various hw blocks
*/ #define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) /* Flag that BO will be used only in preemptible context, which does * not require GTT memory accounting
*/ #define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) /* Flag that BO can be discarded under memory pressure without keeping the * content.
*/ #define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12) /* Flag that BO is shared coherently between multiple devices or CPU threads. * May depend on GPU instructions to flush caches to system scope explicitly. * * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
*/ #define AMDGPU_GEM_CREATE_COHERENT (1 << 13) /* Flag that BO should not be cached by GPU. Coherent without having to flush * GPU caches explicitly * * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
*/ #define AMDGPU_GEM_CREATE_UNCACHED (1 << 14) /* Flag that BO should be coherent across devices when using device-level * atomics. May depend on GPU instructions to flush caches to device scope * explicitly, promoting them to system scope automatically. * * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
*/ #define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) /* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ #define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16)
struct drm_amdgpu_gem_create_in { /** the requested memory size */
__u64 bo_size; /** physical start_addr alignment in bytes for some HW requirements */
__u64 alignment; /** the requested memory domains */
__u64 domains; /** allocation flags */
__u64 domain_flags;
};
union drm_amdgpu_gem_create { struct drm_amdgpu_gem_create_in in; struct drm_amdgpu_gem_create_out out;
};
/** Opcode to create new residency list. */ #define AMDGPU_BO_LIST_OP_CREATE 0 /** Opcode to destroy previously created residency list */ #define AMDGPU_BO_LIST_OP_DESTROY 1 /** Opcode to update resource information in the list */ #define AMDGPU_BO_LIST_OP_UPDATE 2
struct drm_amdgpu_bo_list_in { /** Type of operation */
__u32 operation; /** Handle of list or 0 if we want to create one */
__u32 list_handle; /** Number of BOs in list */
__u32 bo_number; /** Size of each element describing BO */
__u32 bo_info_size; /** Pointer to array describing BOs */
__u64 bo_info_ptr;
};
struct drm_amdgpu_bo_list_entry { /** Handle of BO */
__u32 bo_handle; /** New (if specified) BO priority to be used during migration */
__u32 bo_priority;
};
struct drm_amdgpu_bo_list_out { /** Handle of resource list */
__u32 list_handle;
__u32 _pad;
};
union drm_amdgpu_bo_list { struct drm_amdgpu_bo_list_in in; struct drm_amdgpu_bo_list_out out;
};
/* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 /* this the context caused it */ #define AMDGPU_CTX_GUILTY_RESET 1 /* some other context caused it */ #define AMDGPU_CTX_INNOCENT_RESET 2 /* unknown cause */ #define AMDGPU_CTX_UNKNOWN_RESET 3
/* indicate gpu reset occurred after ctx created */ #define AMDGPU_CTX_QUERY2_FLAGS_RESET (1<<0) /* indicate vram lost occurred after ctx created */ #define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) /* indicate some job from this context once cause gpu hang */ #define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) /* indicate some errors are detected by RAS */ #define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3) #define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4) /* indicate that the reset hasn't completed yet */ #define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5)
/* Context priority level */ #define AMDGPU_CTX_PRIORITY_UNSET -2048 #define AMDGPU_CTX_PRIORITY_VERY_LOW -1023 #define AMDGPU_CTX_PRIORITY_LOW -512 #define AMDGPU_CTX_PRIORITY_NORMAL 0 /* * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires * CAP_SYS_NICE or DRM_MASTER
*/ #define AMDGPU_CTX_PRIORITY_HIGH 512 #define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023
struct { /** For future use, no flags defined so far */
__u64 flags; /** Number of resets caused by this context so far. */
__u32 hangs; /** Reset status since the last call of the ioctl. */
__u32 reset_status;
} state;
struct {
__u32 flags;
__u32 _pad;
} pstate;
};
union drm_amdgpu_ctx { struct drm_amdgpu_ctx_in in; union drm_amdgpu_ctx_out out;
};
/* queue priority levels */ /* low < normal low < normal high < high */ #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK 0x3 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ /* for queues that need access to protected content */ #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2)
/* * This structure is a container to pass input configuration * info for all supported userqueue related operations. * For operation AMDGPU_USERQ_OP_CREATE: user is expected * to set all fields, excep the parameter 'queue_id'. * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected * to be set is 'queue_id', eveything else is ignored.
*/ struct drm_amdgpu_userq_in { /** AMDGPU_USERQ_OP_* */
__u32 op; /** Queue id passed for operation USERQ_OP_FREE */
__u32 queue_id; /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
__u32 ip_type; /** * @doorbell_handle: the handle of doorbell GEM object * associated with this userqueue client.
*/
__u32 doorbell_handle; /** * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo. * Kernel will generate absolute doorbell offset using doorbell_handle * and doorbell_offset in the doorbell bo.
*/
__u32 doorbell_offset; /** * @flags: flags used for queue parameters
*/
__u32 flags; /** * @queue_va: Virtual address of the GPU memory which holds the queue * object. The queue holds the workload packets.
*/
__u64 queue_va; /** * @queue_size: Size of the queue in bytes, this needs to be 256-byte * aligned.
*/
__u64 queue_size; /** * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR. * This object must be at least 8 byte in size and aligned to 8-byte offset.
*/
__u64 rptr_va; /** * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR. * This object must be at least 8 byte in size and aligned to 8-byte offset. * * Queue, RPTR and WPTR can come from the same object, as long as the size * and alignment related requirements are met.
*/
__u64 wptr_va; /** * @mqd: MQD (memory queue descriptor) is a set of parameters which allow * the GPU to uniquely define and identify a usermode queue. * * MQD data can be of different size for different GPU IP/engine and * their respective versions/revisions, so this points to a __u64 * * which holds IP specific MQD of this usermode queue.
*/
__u64 mqd; /** * @size: size of MQD data in bytes, it must match the MQD structure * size of the respective engine/revision defined in UAPI for ex, for * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
*/
__u64 mqd_size;
};
/* The structure to carry output of userqueue ops */ struct drm_amdgpu_userq_out { /** * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique * queue ID to represent the newly created userqueue in the system, otherwise * it should be ignored.
*/
__u32 queue_id;
__u32 _pad;
};
union drm_amdgpu_userq { struct drm_amdgpu_userq_in in; struct drm_amdgpu_userq_out out;
};
/* GFX V11 IP specific MQD parameters */ struct drm_amdgpu_userq_mqd_gfx11 { /** * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
*/
__u64 shadow_va; /** * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
*/
__u64 csa_va;
};
/* GFX V11 SDMA IP specific MQD parameters */ struct drm_amdgpu_userq_mqd_sdma_gfx11 { /** * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL * to get the size.
*/
__u64 csa_va;
};
/* GFX V11 Compute IP specific MQD parameters */ struct drm_amdgpu_userq_mqd_compute_gfx11 { /** * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL * to get the size.
*/
__u64 eop_va;
};
/* userq signal/wait ioctl */ struct drm_amdgpu_userq_signal { /** * @queue_id: Queue handle used by the userq fence creation function * to retrieve the WPTR.
*/
__u32 queue_id;
__u32 pad; /** * @syncobj_handles: The list of syncobj handles submitted by the user queue * job to be signaled.
*/
__u64 syncobj_handles; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in * @syncobj_handles.
*/
__u64 num_syncobj_handles; /** * @bo_read_handles: The list of BO handles that the submitted user queue job * is using for read only. This will update BO fences in the kernel.
*/
__u64 bo_read_handles; /** * @bo_write_handles: The list of BO handles that the submitted user queue job * is using for write only. This will update BO fences in the kernel.
*/
__u64 bo_write_handles; /** * @num_bo_read_handles: A count that represents the number of read BO handles in * @bo_read_handles.
*/
__u32 num_bo_read_handles; /** * @num_bo_write_handles: A count that represents the number of write BO handles in * @bo_write_handles.
*/
__u32 num_bo_write_handles;
};
struct drm_amdgpu_userq_fence_info { /** * @va: A gpu address allocated for each queue which stores the * read pointer (RPTR) value.
*/
__u64 va; /** * @value: A 64 bit value represents the write pointer (WPTR) of the * queue commands which compared with the RPTR value to signal the * fences.
*/
__u64 value;
};
struct drm_amdgpu_userq_wait { /** * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the * wait queue and maintain the fence driver references in it.
*/
__u32 waitq_id;
__u32 pad; /** * @syncobj_handles: The list of syncobj handles submitted by the user queue * job to get the va/value pairs.
*/
__u64 syncobj_handles; /** * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
*/
__u64 syncobj_timeline_handles; /** * @syncobj_timeline_points: The list of timeline syncobj points submitted by the * user queue job for the corresponding @syncobj_timeline_handles.
*/
__u64 syncobj_timeline_points; /** * @bo_read_handles: The list of read BO handles submitted by the user queue * job to get the va/value pairs.
*/
__u64 bo_read_handles; /** * @bo_write_handles: The list of write BO handles submitted by the user queue * job to get the va/value pairs.
*/
__u64 bo_write_handles; /** * @num_syncobj_timeline_handles: A count that represents the number of timeline * syncobj handles in @syncobj_timeline_handles.
*/
__u16 num_syncobj_timeline_handles; /** * @num_fences: This field can be used both as input and output. As input it defines * the maximum number of fences that can be returned and as output it will specify * how many fences were actually returned from the ioctl.
*/
__u16 num_fences; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in * @syncobj_handles.
*/
__u32 num_syncobj_handles; /** * @num_bo_read_handles: A count that represents the number of read BO handles in * @bo_read_handles.
*/
__u32 num_bo_read_handles; /** * @num_bo_write_handles: A count that represents the number of write BO handles in * @bo_write_handles.
*/
__u32 num_bo_write_handles; /** * @out_fences: The field is a return value from the ioctl containing the list of * address/value pairs to wait for.
*/
__u64 out_fences;
};
/* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2
union drm_amdgpu_sched { struct drm_amdgpu_sched_in in;
};
/* * This is not a reliable API and you should expect it to fail for any * number of reasons and have fallback path that do not use userptr to * perform any operation.
*/ #define AMDGPU_GEM_USERPTR_READONLY (1 << 0) #define AMDGPU_GEM_USERPTR_ANONONLY (1 << 1) #define AMDGPU_GEM_USERPTR_VALIDATE (1 << 2) #define AMDGPU_GEM_USERPTR_REGISTER (1 << 3)
/* GFX12 and later: */ #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 /* These are DCC recompression settings for memory management: */ #define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 #define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ #define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 #define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ #define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 #define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ /* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata
* to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */ #define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14 #define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1 /* bit gap */ #define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 #define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1
/** The same structure is shared for input/output */ struct drm_amdgpu_gem_metadata { /** GEM Object handle */
__u32 handle; /** Do we want get or set metadata */
__u32 op; struct { /** For future use, no flags defined so far */
__u64 flags; /** family specific tiling info */
__u64 tiling_info;
__u32 data_size_bytes;
__u32 data[64];
} data;
};
struct drm_amdgpu_gem_mmap_out { /** mmap offset from the vma offset manager */
__u64 addr_ptr;
};
union drm_amdgpu_gem_mmap { struct drm_amdgpu_gem_mmap_in in; struct drm_amdgpu_gem_mmap_out out;
};
struct drm_amdgpu_gem_wait_idle_in { /** GEM object handle */
__u32 handle; /** For future use, no flags defined so far */
__u32 flags; /** Absolute timeout to wait */
__u64 timeout;
};
struct drm_amdgpu_gem_wait_idle_out { /** BO status: 0 - BO is idle, 1 - BO is busy */
__u32 status; /** Returned current memory domain */
__u32 domain;
};
union drm_amdgpu_gem_wait_idle { struct drm_amdgpu_gem_wait_idle_in in; struct drm_amdgpu_gem_wait_idle_out out;
};
struct drm_amdgpu_wait_cs_in { /* Command submission handle * handle equals 0 means none to wait for * handle equals ~0ull means wait for the latest sequence number
*/
__u64 handle; /** Absolute timeout to wait */
__u64 timeout;
__u32 ip_type;
__u32 ip_instance;
__u32 ring;
__u32 ctx_id;
};
/* Sets or returns a value associated with a buffer. */ struct drm_amdgpu_gem_op { /** GEM object handle */
__u32 handle; /** AMDGPU_GEM_OP_* */
__u32 op; /** Input or return value */
__u64 value;
};
/* Delay the page table update till the next CS */ #define AMDGPU_VM_DELAY_UPDATE (1 << 0)
/* Mapping flags */ /* readable mapping */ #define AMDGPU_VM_PAGE_READABLE (1 << 1) /* writable mapping */ #define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) /* executable mapping, new for VI */ #define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) /* partially resident texture */ #define AMDGPU_VM_PAGE_PRT (1 << 4) /* MTYPE flags use bit 5 to 8 */ #define AMDGPU_VM_MTYPE_MASK (0xf << 5) /* Default MTYPE. Pre-AI must use this. Recommended for newer ASICs. */ #define AMDGPU_VM_MTYPE_DEFAULT (0 << 5) /* Use Non Coherent MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_NC (1 << 5) /* Use Write Combine MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_WC (2 << 5) /* Use Cache Coherent MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_CC (3 << 5) /* Use UnCached MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_UC (4 << 5) /* Use Read Write MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_RW (5 << 5) /* don't allocate MALL */ #define AMDGPU_VM_PAGE_NOALLOC (1 << 9)
struct drm_amdgpu_gem_va { /** GEM object handle */
__u32 handle;
__u32 _pad; /** AMDGPU_VA_OP_* */
__u32 operation; /** AMDGPU_VM_PAGE_* */
__u32 flags; /** va address to assign . Must be correctly aligned.*/
__u64 va_address; /** Specify offset inside of BO to assign. Must be correctly aligned.*/
__u64 offset_in_bo; /** Specify mapping size. Must be correctly aligned. */
__u64 map_size; /** * vm_timeline_point is a sequence number used to add new timeline point.
*/
__u64 vm_timeline_point; /** * The vm page table update fence is installed in given vm_timeline_syncobj_out * at vm_timeline_point.
*/
__u32 vm_timeline_syncobj_out; /** the number of syncobj handles in @input_fence_syncobj_handles */
__u32 num_syncobj_handles; /** Array of sync object handle to wait for given input fences */
__u64 input_fence_syncobj_handles;
};
#define AMDGPU_HW_IP_GFX 0 #define AMDGPU_HW_IP_COMPUTE 1 #define AMDGPU_HW_IP_DMA 2 #define AMDGPU_HW_IP_UVD 3 #define AMDGPU_HW_IP_VCE 4 #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 /* * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support * both encoding and decoding jobs.
*/ #define AMDGPU_HW_IP_VCN_ENC 7 #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_VPE 9 #define AMDGPU_HW_IP_NUM 10
struct drm_amdgpu_cs_in { /** Rendering context id */
__u32 ctx_id; /** Handle of resource list associated with CS */
__u32 bo_list_handle;
__u32 num_chunks;
__u32 flags; /** this points to __u64 * which point to cs chunks */
__u64 chunks;
};
struct drm_amdgpu_cs_out {
__u64 handle;
};
union drm_amdgpu_cs { struct drm_amdgpu_cs_in in; struct drm_amdgpu_cs_out out;
};
/* Specify flags to be used for IB */
/* This IB should be submitted to CE */ #define AMDGPU_IB_FLAG_CE (1<<0)
/* Preamble flag, which means the IB could be dropped if no context switch */ #define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ #define AMDGPU_IB_FLAG_PREEMPT (1<<2)
/* The IB fence should do the L2 writeback but not invalidate any shader
* caches (L2/vL1/sL1/I$). */ #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. * This will reset wave ID counters for the IB.
*/ #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
/* Flag the IB as secure (TMZ)
*/ #define AMDGPU_IB_FLAGS_SECURE (1 << 5)
/* Tell KMD to flush and invalidate caches
*/ #define AMDGPU_IB_FLAG_EMIT_MEM_SYNC (1 << 6)
struct drm_amdgpu_cs_chunk_ib {
__u32 _pad; /** AMDGPU_IB_FLAG_* */
__u32 flags; /** Virtual address to begin IB execution */
__u64 va_start; /** Size of submission */
__u32 ib_bytes; /** HW IP to submit to */
__u32 ip_type; /** HW IP index of the same type to submit to */
__u32 ip_instance; /** Ring index to submit to */
__u32 ring;
};
struct drm_amdgpu_query_fw { /** AMDGPU_INFO_FW_* */
__u32 fw_type; /** * Index of the IP if there are more IPs of * the same type.
*/
__u32 ip_instance; /** * Index of the engine. Whether this is used depends * on the firmware type. (e.g. MEC, SDMA)
*/
__u32 index;
__u32 _pad;
};
/* Input structure for the INFO ioctl */ struct drm_amdgpu_info { /* Where the return value will be stored */
__u64 return_pointer; /* The size of the return value. Just like "size" in "snprintf",
* it limits how many bytes the kernel can write. */
__u32 return_size; /* The query request id. */
__u32 query;
union { struct {
__u32 id;
__u32 _pad;
} mode_crtc;
struct { /** AMDGPU_HW_IP_* */
__u32 type; /** * Index of the IP if there are more IPs of the same * type. Ignored by AMDGPU_INFO_HW_IP_COUNT.
*/
__u32 ip_instance;
} query_hw_ip;
struct {
__u32 dword_offset; /** number of registers to read */
__u32 count;
__u32 instance; /** For future use, no flags defined so far */
__u32 flags;
} read_mmr_reg;
struct drm_amdgpu_query_fw query_fw;
struct {
__u32 type;
__u32 offset;
} vbios_info;
struct {
__u32 type;
} sensor_info;
struct {
__u32 type;
} video_cap;
};
};
struct drm_amdgpu_info_gds { /** GDS GFX partition size */
__u32 gds_gfx_partition_size; /** GDS compute partition size */
__u32 compute_partition_size; /** total GDS memory size */
__u32 gds_total_size; /** GWS size per GFX partition */
__u32 gws_per_gfx_partition; /** GSW size per compute partition */
__u32 gws_per_compute_partition; /** OA size per GFX partition */
__u32 oa_per_gfx_partition; /** OA size per compute partition */
__u32 oa_per_compute_partition;
__u32 _pad;
};
/** Theoretical max. available memory in the given heap */
__u64 usable_heap_size;
/** * Number of bytes allocated in the heap. This includes all processes * and private allocations in the kernel. It changes when new buffers * are allocated, freed, and moved. It cannot be larger than * heap_size.
*/
__u64 heap_usage;
/** * Theoretical possible max. size of buffer which * could be allocated in the given heap
*/
__u64 max_allocation;
};
struct drm_amdgpu_info_device { /** PCI Device ID */
__u32 device_id; /** Internal chip revision: A0, A1, etc.) */
__u32 chip_rev;
__u32 external_rev; /** Revision id in PCI Config space */
__u32 pci_rev;
__u32 family;
__u32 num_shader_engines;
__u32 num_shader_arrays_per_engine; /* in KHz */
__u32 gpu_counter_freq;
__u64 max_engine_clock;
__u64 max_memory_clock; /* cu information */
__u32 cu_active_number; /* NOTE: cu_ao_mask is INVALID, DON'T use it */
__u32 cu_ao_mask;
__u32 cu_bitmap[4][4]; /** Render backend pipe mask. One render backend is CB+DB. */
__u32 enabled_rb_pipes_mask;
__u32 num_rb_pipes;
__u32 num_hw_gfx_contexts; /* PCIe version (the smaller of the GPU and the CPU/motherboard) */
__u32 pcie_gen;
__u64 ids_flags; /** Starting virtual address for UMDs. */
__u64 virtual_address_offset; /** The maximum virtual address */
__u64 virtual_address_max; /** Required alignment of virtual addresses. */
__u32 virtual_address_alignment; /** Page table entry - fragment size */
__u32 pte_fragment_size;
__u32 gart_page_size; /** constant engine ram size*/
__u32 ce_ram_size; /** video memory type info*/
__u32 vram_type; /** video memory bit width*/
__u32 vram_bit_width; /* vce harvesting instance */
__u32 vce_harvest_config; /* gfx double offchip LDS buffers */
__u32 gc_double_offchip_lds_buf; /* NGG Primitive Buffer */
__u64 prim_buf_gpu_addr; /* NGG Position Buffer */
__u64 pos_buf_gpu_addr; /* NGG Control Sideband */
__u64 cntl_sb_buf_gpu_addr; /* NGG Parameter Cache */
__u64 param_buf_gpu_addr;
__u32 prim_buf_size;
__u32 pos_buf_size;
__u32 cntl_sb_buf_size;
__u32 param_buf_size; /* wavefront size*/
__u32 wave_front_size; /* shader visible vgprs*/
__u32 num_shader_visible_vgprs; /* CU per shader array*/
__u32 num_cu_per_sh; /* number of tcc blocks*/
__u32 num_tcc_blocks; /* gs vgt table depth*/
__u32 gs_vgt_table_depth; /* gs primitive buffer depth*/
__u32 gs_prim_buffer_depth; /* max gs wavefront per vgt*/
__u32 max_gs_waves_per_vgt; /* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */
__u32 pcie_num_lanes; /* always on cu bitmap */
__u32 cu_ao_bitmap[4][4]; /** Starting high virtual address for UMDs. */
__u64 high_va_offset; /** The maximum high virtual address */
__u64 high_va_max; /* gfx10 pa_sc_tile_steering_override */
__u32 pa_sc_tile_steering_override; /* disabled TCCs */
__u64 tcc_disabled_mask;
__u64 min_engine_clock;
__u64 min_memory_clock; /* The following fields are only set on gfx11+, older chips set 0. */
__u32 tcp_cache_size; /* AKA GL0, VMEM cache */
__u32 num_sqc_per_wgp;
__u32 sqc_data_cache_size; /* AKA SMEM cache */
__u32 sqc_inst_cache_size;
__u32 gl1c_cache_size;
__u32 gl2c_cache_size;
__u64 mall_size; /* AKA infinity cache */ /* high 32 bits of the rb pipes mask */
__u32 enabled_rb_pipes_mask_hi; /* shadow area size for gfx11 */
__u32 shadow_size; /* shadow area base virtual alignment for gfx11 */
__u32 shadow_alignment; /* context save area size for gfx11 */
__u32 csa_size; /* context save area base virtual alignment for gfx11 */
__u32 csa_alignment; /* Userq IP mask (1 << AMDGPU_HW_IP_*) */
__u32 userq_ip_mask;
__u32 pad;
};
struct drm_amdgpu_info_hw_ip { /** Version of h/w IP */
__u32 hw_ip_version_major;
__u32 hw_ip_version_minor; /** Capabilities */
__u64 capabilities_flags; /** command buffer address start alignment*/
__u32 ib_start_alignment; /** command buffer size alignment*/
__u32 ib_size_alignment; /** Bitmask of available rings. Bit 0 means ring 0, etc. */
__u32 available_rings; /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */
__u32 ip_discovery_version; /* Userq available slots */
__u32 userq_num_slots;
};
struct drm_amdgpu_info_num_handles { /** Max handles as supported by firmware for UVD */
__u32 uvd_max_handles; /** Handles currently in use for UVD */
__u32 uvd_used_handles;
};
struct drm_amdgpu_info_uq_metadata_gfx { /* shadow area size for gfx11 */
__u32 shadow_size; /* shadow area base virtual alignment for gfx11 */
__u32 shadow_alignment; /* context save area size for gfx11 */
__u32 csa_size; /* context save area base virtual alignment for gfx11 */
__u32 csa_alignment;
};
struct drm_amdgpu_info_uq_metadata { union { struct drm_amdgpu_info_uq_metadata_gfx gfx;
};
};
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.