/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/pci.h>
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_psp.h"
#include "amdgpu_smu.h"
#include "imu_v12_0.h"
#include "soc24.h"
#include "nvd.h"
#include "gc/gc_12_0_0_offset.h"
#include "gc/gc_12_0_0_sh_mask.h"
#include "soc24_enum.h"
#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h"
#include "soc15.h"
#include "clearstate_gfx12.h"
#include "v12_structs.h"
#include "gfx_v12_0.h"
#include "nbif_v6_3_1.h"
#include "mes_v12_0.h"
#include "mes_userqueue.h"
#include "amdgpu_userq_fence.h"
#define GFX12_NUM_GFX_RINGS 1
#define GFX12_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100
#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000
#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000
#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01
#define regCP_GFX_HQD_CNTL_DEFAULT 0x00f00000
#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000
#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000
#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006
#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
#define regCP_MQD_CONTROL_DEFAULT 0x00000100
#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501
#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000
MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin" );
MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin" );
MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin" );
MODULE_FIRMWARE("amdgpu/gc_12_0_0_rlc.bin" );
MODULE_FIRMWARE("amdgpu/gc_12_0_0_toc.bin" );
MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin" );
MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin" );
MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin" );
MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin" );
MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc_kicker.bin" );
MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin" );
static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32),
SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32),
SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
/* cp header registers */
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
};
static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
/* compute registers */
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
/* cp header registers */
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
};
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
/* gfx queue registers */
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
/* cp header registers */
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
};
static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
};
static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000),
};
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev);
static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev);
static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev);
static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev);
static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 instance, int xcc_id);
static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val);
static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub, uint8_t dst_sel);
static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
bool enable);
static void gfx_v12_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
uint64_t queue_mask)
{
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0);
}
static void gfx_v12_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_ring *ring)
{
uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
uint64_t wptr_addr = ring->wptr_gpu_addr;
uint32_t me = 0, eng_sel = 0;
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_COMPUTE:
me = 1;
eng_sel = 0;
break ;
case AMDGPU_RING_TYPE_GFX:
me = 0;
eng_sel = 4;
break ;
case AMDGPU_RING_TYPE_MES:
me = 2;
eng_sel = 5;
break ;
default :
WARN_ON(1);
}
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
PACKET3_MAP_QUEUES_ME((me)) |
PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
}
static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
struct amdgpu_ring *ring,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq)
{
struct amdgpu_device *adev = kiq_ring->adev;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
return ;
}
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(action) |
PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
amdgpu_ring_write(kiq_ring,
PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
if (action == PREEMPT_QUEUES_NO_UNMAP) {
amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
amdgpu_ring_write(kiq_ring, seq);
} else {
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
}
}
static void gfx_v12_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
struct amdgpu_ring *ring,
u64 addr, u64 seq)
{
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
amdgpu_ring_write(kiq_ring,
PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
PACKET3_QUERY_STATUS_COMMAND(2));
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
}
static void gfx_v12_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
uint16_t pasid,
uint32_t flush_type,
bool all_hub)
{
gfx_v12_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
}
static const struct kiq_pm4_funcs gfx_v12_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx_v12_0_kiq_set_resources,
.kiq_map_queues = gfx_v12_0_kiq_map_queues,
.kiq_unmap_queues = gfx_v12_0_kiq_unmap_queues,
.kiq_query_status = gfx_v12_0_kiq_query_status,
.kiq_invalidate_tlbs = gfx_v12_0_kiq_invalidate_tlbs,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
.query_status_size = 7,
.invalidate_tlbs_size = 2,
};
static void gfx_v12_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
{
adev->gfx.kiq[0].pmf = &gfx_v12_0_kiq_pm4_funcs;
}
static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
int mem_space, int opt, uint32_t addr0,
uint32_t addr1, uint32_t ref,
uint32_t mask, uint32_t inv)
{
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring,
/* memory (1) or register (0) */
(WAIT_REG_MEM_MEM_SPACE(mem_space) |
WAIT_REG_MEM_OPERATION(opt) | /* wait */
WAIT_REG_MEM_FUNCTION(3) | /* equal */
WAIT_REG_MEM_ENGINE(eng_sel)));
if (mem_space)
BUG_ON(addr0 & 0x3); /* Dword align */
amdgpu_ring_write(ring, addr0);
amdgpu_ring_write(ring, addr1);
amdgpu_ring_write(ring, ref);
amdgpu_ring_write(ring, mask);
amdgpu_ring_write(ring, inv); /* poll interval */
}
static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
uint32_t tmp = 0;
unsigned i;
int r;
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 5);
if (r) {
dev_err(adev->dev,
"amdgpu: cp failed to lock ring %d (%d).\n" ,
ring->idx, r);
return r;
}
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
gfx_v12_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
} else {
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, scratch -
PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
}
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break ;
if (amdgpu_emu_mode == 1)
msleep(1);
else
udelay(1);
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
return r;
}
static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
unsigned index;
uint64_t gpu_addr;
volatile uint32_t *cpu_ptr;
long r;
/* MES KIQ fw hasn't indirect buffer support for now */
if (adev->enable_mes_kiq &&
ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
return 0;
memset(&ib, 0, sizeof (ib));
r = amdgpu_device_wb_get(adev, &index);
if (r)
return r;
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
cpu_ptr = &adev->wb.wb[index];
r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n" , r);
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
ib.ptr[2] = lower_32_bits(gpu_addr);
ib.ptr[3] = upper_32_bits(gpu_addr);
ib.ptr[4] = 0xDEADBEEF;
ib.length_dw = 5;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err2;
r = dma_fence_wait_timeout(f, false , timeout);
if (r == 0) {
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
goto err2;
}
if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
err2:
amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
return r;
}
static void gfx_v12_0_free_microcode(struct amdgpu_device *adev)
{
amdgpu_ucode_release(&adev->gfx.pfp_fw);
amdgpu_ucode_release(&adev->gfx.me_fw);
amdgpu_ucode_release(&adev->gfx.rlc_fw);
amdgpu_ucode_release(&adev->gfx.mec_fw);
kfree(adev->gfx.rlc.register_list_format);
}
static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
{
const struct psp_firmware_header_v1_0 *toc_hdr;
int err = 0;
err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_toc.bin" , ucode_prefix);
if (err)
goto out;
toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
return 0;
out:
amdgpu_ucode_release(&adev->psp.toc_fw);
return err;
}
static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
{
char ucode_prefix[30];
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
uint16_t version_minor;
DRM_DEBUG("\n" );
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof (ucode_prefix));
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin" , ucode_prefix);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin" , ucode_prefix);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
if (!amdgpu_sriov_vf(adev)) {
if (amdgpu_is_kicker_fw(adev))
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc_kicker.bin" , ucode_prefix);
else
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin" , ucode_prefix);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
if (err)
goto out;
}
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin" , ucode_prefix);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
err = gfx_v12_0_init_toc_microcode(adev, ucode_prefix);
/* only one MEC for gfx 12 */
adev->gfx.mec2_fw = NULL;
if (adev->gfx.imu.funcs) {
if (adev->gfx.imu.funcs->init_microcode) {
err = adev->gfx.imu.funcs->init_microcode(adev);
if (err)
dev_err(adev->dev, "Failed to load imu firmware!\n" );
}
}
out:
if (err) {
amdgpu_ucode_release(&adev->gfx.pfp_fw);
amdgpu_ucode_release(&adev->gfx.me_fw);
amdgpu_ucode_release(&adev->gfx.rlc_fw);
amdgpu_ucode_release(&adev->gfx.mec_fw);
}
return err;
}
static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev)
{
u32 count = 0;
const struct cs_section_def *sect = NULL;
const struct cs_extent_def *ext = NULL;
count += 1;
for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
if (sect->id == SECT_CONTEXT) {
for (ext = sect->section; ext->extent != NULL; ++ext)
count += 2 + ext->reg_count;
} else
return 0;
}
return count;
}
static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev,
volatile u32 *buffer)
{
u32 count = 0, clustercount = 0, i;
const struct cs_section_def *sect = NULL;
const struct cs_extent_def *ext = NULL;
if (adev->gfx.rlc.cs_data == NULL)
return ;
if (buffer == NULL)
return ;
count += 1;
for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
if (sect->id == SECT_CONTEXT) {
for (ext = sect->section; ext->extent != NULL; ++ext) {
clustercount++;
buffer[count++] = ext->reg_count;
buffer[count++] = ext->reg_index;
for (i = 0; i < ext->reg_count; i++)
buffer[count++] = cpu_to_le32(ext->extent[i]);
}
} else
return ;
}
buffer[0] = clustercount;
}
static void gfx_v12_0_rlc_fini(struct amdgpu_device *adev)
{
/* clear state block */
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
(void **)&adev->gfx.rlc.cs_ptr);
/* jump table block */
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
}
static void gfx_v12_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
{
struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
adev->gfx.rlc.rlcg_reg_access_supported = true ;
}
static int gfx_v12_0_rlc_init(struct amdgpu_device *adev)
{
const struct cs_section_def *cs_data;
int r;
adev->gfx.rlc.cs_data = gfx12_cs_data;
cs_data = adev->gfx.rlc.cs_data;
if (cs_data) {
/* init clear state block */
r = amdgpu_gfx_rlc_init_csb(adev);
if (r)
return r;
}
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
return 0;
}
static void gfx_v12_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
}
static void gfx_v12_0_me_init(struct amdgpu_device *adev)
{
bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
amdgpu_gfx_graphics_queue_acquire(adev);
}
static int gfx_v12_0_mec_init(struct amdgpu_device *adev)
{
int r;
u32 *hpd;
size_t mec_hpd_size;
bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
/* take ownership of the relevant compute queues */
amdgpu_gfx_compute_queue_acquire(adev);
mec_hpd_size = adev->gfx.num_compute_rings * GFX12_MEC_HPD_SIZE;
if (mec_hpd_size) {
r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->gfx.mec.hpd_eop_obj,
&adev->gfx.mec.hpd_eop_gpu_addr,
(void **)&hpd);
if (r) {
dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n" , r);
gfx_v12_0_mec_fini(adev);
return r;
}
memset(hpd, 0, mec_hpd_size);
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
}
return 0;
}
static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
{
WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
(address << SQ_IND_INDEX__INDEX__SHIFT));
return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
}
static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
uint32_t thread, uint32_t regno,
uint32_t num, uint32_t *out)
{
WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
(regno << SQ_IND_INDEX__INDEX__SHIFT) |
(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
(SQ_IND_INDEX__AUTO_INCR_MASK));
while (num--)
*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
}
static void gfx_v12_0_read_wave_data(struct amdgpu_device *adev,
uint32_t xcc_id,
uint32_t simd, uint32_t wave,
uint32_t *dst, int *no_fields)
{
/* in gfx12 the SIMD_ID is specified as part of the INSTANCE
* field when performing a select_se_sh so it should be
* zero here */
WARN_ON(simd != 0);
/* type 4 wave data */
dst[(*no_fields)++] = 4;
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATE_PRIV);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_USER);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAP_CTRL);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_ACTIVE);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_VALID_AND_IDLE);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_SCHED_MODE);
}
static void gfx_v12_0_read_wave_sgprs(struct amdgpu_device *adev,
uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t start,
uint32_t size, uint32_t *dst)
{
WARN_ON(simd != 0);
wave_read_regs(
adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
dst);
}
static void gfx_v12_0_read_wave_vgprs(struct amdgpu_device *adev,
uint32_t xcc_id, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t start, uint32_t size,
uint32_t *dst)
{
wave_read_regs(
adev, wave, thread,
start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
}
static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
soc24_grbm_select(adev, me, pipe, q, vm);
}
/* all sizes are in bytes */
#define MQD_SHADOW_BASE_SIZE 73728
#define MQD_SHADOW_BASE_ALIGNMENT 256
#define MQD_FWWORKAREA_SIZE 484
#define MQD_FWWORKAREA_ALIGNMENT 256
static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
struct amdgpu_gfx_shadow_info *shadow_info)
{
shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
}
static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev,
struct amdgpu_gfx_shadow_info *shadow_info,
bool skip_check)
{
if (adev->gfx.cp_gfx_shadow || skip_check) {
gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
return 0;
}
memset(shadow_info, 0, sizeof (struct amdgpu_gfx_shadow_info));
return -EINVAL;
}
static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v12_0_select_se_sh,
.read_wave_data = &gfx_v12_0_read_wave_data,
.read_wave_sgprs = &gfx_v12_0_read_wave_sgprs,
.read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
.update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
.get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info,
};
static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
adev->gfx.config.sc_hiz_tile_fifo_size = 0;
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
break ;
default :
BUG();
break ;
}
return 0;
}
static int gfx_v12_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
int me, int pipe, int queue)
{
int r;
struct amdgpu_ring *ring;
unsigned int irq_type;
ring = &adev->gfx.gfx_ring[ring_id];
ring->me = me;
ring->pipe = pipe;
ring->queue = queue;
ring->ring_obj = NULL;
ring->use_doorbell = true ;
if (!ring_id)
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
else
ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "gfx_%d.%d.%d" , ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
return 0;
}
static int gfx_v12_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
int mec, int pipe, int queue)
{
int r;
unsigned irq_type;
struct amdgpu_ring *ring;
unsigned int hw_prio;
ring = &adev->gfx.compute_ring[ring_id];
/* mec0 is me1 */
ring->me = mec + 1;
ring->pipe = pipe;
ring->queue = queue;
ring->ring_obj = NULL;
ring->use_doorbell = true ;
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX12_MEC_HPD_SIZE);
ring->vm_hub = AMDGPU_GFXHUB(0);
sprintf(ring->name, "comp_%d.%d.%d" , ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
hw_prio, NULL);
if (r)
return r;
return 0;
}
static struct {
SOC24_FIRMWARE_ID id;
unsigned int offset;
unsigned int size;
unsigned int size_x16;
} rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
#define RLC_TOC_OFFSET_DWUNIT 8
#define RLC_SIZE_MULTIPLE 1024
#define RLC_TOC_UMF_SIZE_inM 23ULL
#define RLC_TOC_FORMAT_API 165ULL
static void gfx_v12_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
{
RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
rlc_autoload_info[ucode->id].id = ucode->id;
rlc_autoload_info[ucode->id].offset =
ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
rlc_autoload_info[ucode->id].size =
ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
ucode->size * 4;
ucode++;
}
}
static uint32_t gfx_v12_0_calc_toc_total_size(struct amdgpu_device *adev)
{
uint32_t total_size = 0;
SOC24_FIRMWARE_ID id;
gfx_v12_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
total_size += rlc_autoload_info[id].size;
/* In case the offset in rlc toc ucode is aligned */
if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
total_size = RLC_TOC_UMF_SIZE_inM << 20;
return total_size;
}
static int gfx_v12_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
{
int r;
uint32_t total_size;
total_size = gfx_v12_0_calc_toc_total_size(adev);
r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.rlc_autoload_bo,
&adev->gfx.rlc.rlc_autoload_gpu_addr,
(void **)&adev->gfx.rlc.rlc_autoload_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to create fw autoload bo\n" , r);
return r;
}
return 0;
}
static void gfx_v12_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
SOC24_FIRMWARE_ID id,
const void *fw_data,
uint32_t fw_size)
{
uint32_t toc_offset;
uint32_t toc_fw_size;
char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
return ;
toc_offset = rlc_autoload_info[id].offset;
toc_fw_size = rlc_autoload_info[id].size;
if (fw_size == 0)
fw_size = toc_fw_size;
if (fw_size > toc_fw_size)
fw_size = toc_fw_size;
memcpy(ptr + toc_offset, fw_data, fw_size);
if (fw_size < toc_fw_size)
memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
}
static void
gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
{
void *data;
uint32_t size;
uint32_t *toc_ptr;
data = adev->psp.toc.start_addr;
size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
toc_ptr = (uint32_t *)data + size / 4 - 2;
*toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
data, size);
}
static void
gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
{
const __le32 *fw_data;
uint32_t fw_size;
const struct gfx_firmware_header_v2_0 *cpv2_hdr;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
uint16_t version_major, version_minor;
/* pfp ucode */
cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
adev->gfx.pfp_fw->data;
/* instruction */
fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP,
fw_data, fw_size);
/* data */
fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes));
fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK,
fw_data, fw_size);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK,
fw_data, fw_size);
/* me ucode */
cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
adev->gfx.me_fw->data;
/* instruction */
fw_data = (const __le32 *)(adev->gfx.me_fw->data +
le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME,
fw_data, fw_size);
/* data */
fw_data = (const __le32 *)(adev->gfx.me_fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes));
fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P0_STACK,
fw_data, fw_size);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P1_STACK,
fw_data, fw_size);
/* mec ucode */
cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
adev->gfx.mec_fw->data;
/* instruction */
fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
fw_data, fw_size);
/* data */
fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes));
fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
fw_data, fw_size);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
fw_data, fw_size);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
fw_data, fw_size);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
fw_data, fw_size);
/* rlc ucode */
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
adev->gfx.rlc_fw->data;
fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
fw_data, fw_size);
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
if (version_major == 2) {
if (version_minor >= 1) {
rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
fw_data, fw_size);
fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
fw_data, fw_size);
}
if (version_minor >= 2) {
rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
fw_data, fw_size);
fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
fw_data, fw_size);
}
}
}
static void
gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
{
const __le32 *fw_data;
uint32_t fw_size;
const struct sdma_firmware_header_v3_0 *sdma_hdr;
sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
adev->sdma.instance[0].fw->data;
fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
le32_to_cpu(sdma_hdr->ucode_offset_bytes));
fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
fw_data, fw_size);
}
static void
gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
{
const __le32 *fw_data;
unsigned fw_size;
const struct mes_firmware_header_v1_0 *mes_hdr;
int pipe, ucode_id, data_id;
for (pipe = 0; pipe < 2; pipe++) {
if (pipe == 0) {
ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
data_id = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
} else {
ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
data_id = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
}
mes_hdr = (const struct mes_firmware_header_v1_0 *)
adev->mes.fw[pipe]->data;
fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
}
}
static int gfx_v12_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
{
uint32_t rlc_g_offset, rlc_g_size;
uint64_t gpu_addr;
uint32_t data;
/* RLC autoload sequence 2: copy ucode */
gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(adev);
gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
/* RLC autoload sequence 3: load IMU fw */
if (adev->gfx.imu.funcs->load_microcode)
adev->gfx.imu.funcs->load_microcode(adev);
/* RLC autoload sequence 4 init IMU fw */
if (adev->gfx.imu.funcs->setup_imu)
adev->gfx.imu.funcs->setup_imu(adev);
if (adev->gfx.imu.funcs->start_imu)
adev->gfx.imu.funcs->start_imu(adev);
/* RLC autoload sequence 5 disable gpa mode */
gfx_v12_0_disable_gpa_mode(adev);
} else {
/* unhalt rlc to start autoload without imu */
data = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, data);
WREG32_SOC15(GC, 0, regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
}
return 0;
}
static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev)
{
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
uint32_t *ptr;
uint32_t inst;
ptr = kcalloc(reg_count, sizeof (uint32_t), GFP_KERNEL);
if (!ptr) {
DRM_ERROR("Failed to allocate memory for GFX IP Dump\n" );
adev->gfx.ip_dump_core = NULL;
} else {
adev->gfx.ip_dump_core = ptr;
}
/* Allocate memory for compute queue registers for all the instances */
reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
ptr = kcalloc(reg_count * inst, sizeof (uint32_t), GFP_KERNEL);
if (!ptr) {
DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n" );
adev->gfx.ip_dump_compute_queues = NULL;
} else {
adev->gfx.ip_dump_compute_queues = ptr;
}
/* Allocate memory for gfx queue registers for all the instances */
reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
adev->gfx.me.num_queue_per_pipe;
ptr = kcalloc(reg_count * inst, sizeof (uint32_t), GFP_KERNEL);
if (!ptr) {
DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n" );
adev->gfx.ip_dump_gfx_queues = NULL;
} else {
adev->gfx.ip_dump_gfx_queues = ptr;
}
}
static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id = 0;
unsigned num_compute_rings;
int xcc_id = 0;
struct amdgpu_device *adev = ip_block->adev;
int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 8;
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe_per_mec = 2;
adev->gfx.mec.num_queue_per_pipe = 4;
break ;
default :
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
break ;
}
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
if (!adev->gfx.disable_uq &&
adev->gfx.me_fw_version >= 2780 &&
adev->gfx.pfp_fw_version >= 2840 &&
adev->gfx.mec_fw_version >= 3050 &&
adev->mes.fw_version[0] >= 123) {
adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
}
break ;
default :
break ;
}
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
if (adev->gfx.me_fw_version >= 2480 &&
adev->gfx.pfp_fw_version >= 2530 &&
adev->gfx.mec_fw_version >= 2680 &&
adev->mes.fw_version[0] >= 100)
adev->gfx.enable_cleaner_shader = true ;
break ;
default :
adev->gfx.enable_cleaner_shader = false ;
break ;
}
if (adev->gfx.num_compute_rings) {
/* recalculate compute rings to use based on hardware configuration */
num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe) / 2;
adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
num_compute_rings);
}
/* EOP Event */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
GFX_12_0_0__SRCID__CP_EOP_INTERRUPT,
&adev->gfx.eop_irq);
if (r)
return r;
/* Bad opcode Event */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
GFX_12_0_0__SRCID__CP_BAD_OPCODE_ERROR,
&adev->gfx.bad_op_irq);
if (r)
return r;
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
GFX_12_0_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
GFX_12_0_0__SRCID__CP_PRIV_INSTR_FAULT,
&adev->gfx.priv_inst_irq);
if (r)
return r;
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
gfx_v12_0_me_init(adev);
r = gfx_v12_0_rlc_init(adev);
if (r) {
dev_err(adev->dev, "Failed to init rlc BOs!\n" );
return r;
}
r = gfx_v12_0_mec_init(adev);
if (r) {
dev_err(adev->dev, "Failed to init MEC BOs!\n" );
return r;
}
if (adev->gfx.num_gfx_rings) {
/* set up the gfx ring */
for (i = 0; i < adev->gfx.me.num_me; i++) {
for (j = 0; j < num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
continue ;
r = gfx_v12_0_gfx_ring_init(adev, ring_id,
i, k, j);
if (r)
return r;
ring_id++;
}
}
}
}
if (adev->gfx.num_compute_rings) {
ring_id = 0;
/* set up the compute queues - allocate horizontally across pipes */
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
if (!amdgpu_gfx_is_mec_queue_enabled(adev,
0, i, k, j))
continue ;
r = gfx_v12_0_compute_ring_init(adev, ring_id,
i, k, j);
if (r)
return r;
ring_id++;
}
}
}
}
adev->gfx.gfx_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
if ((adev->gfx.me_fw_version >= 2660) &&
(adev->gfx.mec_fw_version >= 2920) &&
!amdgpu_sriov_vf(adev)) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
}
break ;
default :
break ;
}
if (!adev->enable_mes_kiq) {
r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
if (r) {
dev_err(adev->dev, "Failed to init KIQ BOs!\n" );
return r;
}
r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
}
r = amdgpu_gfx_mqd_sw_init(adev, sizeof (struct v12_compute_mqd), 0);
if (r)
return r;
/* allocate visible FB for rlc auto-loading fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
r = gfx_v12_0_rlc_autoload_buffer_init(adev);
if (r)
return r;
}
r = gfx_v12_0_gpu_early_init(adev);
if (r)
return r;
gfx_v12_0_alloc_ip_dump(adev);
r = amdgpu_gfx_sysfs_init(adev);
if (r)
return r;
return 0;
}
static void gfx_v12_0_pfp_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
&adev->gfx.pfp.pfp_fw_gpu_addr,
(void **)&adev->gfx.pfp.pfp_fw_ptr);
amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
&adev->gfx.pfp.pfp_fw_data_gpu_addr,
(void **)&adev->gfx.pfp.pfp_fw_data_ptr);
}
static void gfx_v12_0_me_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
&adev->gfx.me.me_fw_gpu_addr,
(void **)&adev->gfx.me.me_fw_ptr);
amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
&adev->gfx.me.me_fw_data_gpu_addr,
(void **)&adev->gfx.me.me_fw_data_ptr);
}
static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
&adev->gfx.rlc.rlc_autoload_gpu_addr,
(void **)&adev->gfx.rlc.rlc_autoload_ptr);
}
static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev, 0);
if (!adev->enable_mes_kiq) {
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
amdgpu_gfx_kiq_fini(adev, 0);
}
gfx_v12_0_pfp_fini(adev);
gfx_v12_0_me_fini(adev);
gfx_v12_0_rlc_fini(adev);
gfx_v12_0_mec_fini(adev);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
gfx_v12_0_rlc_autoload_buffer_fini(adev);
gfx_v12_0_free_microcode(adev);
amdgpu_gfx_sysfs_fini(adev);
kfree(adev->gfx.ip_dump_core);
kfree(adev->gfx.ip_dump_compute_queues);
kfree(adev->gfx.ip_dump_gfx_queues);
return 0;
}
static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
u32 sh_num, u32 instance, int xcc_id)
{
u32 data;
if (instance == 0xffffffff)
data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1);
else
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
instance);
if (se_num == 0xffffffff)
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1);
else
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
if (sh_num == 0xffffffff)
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1);
else
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
}
static u32 gfx_v12_0_get_sa_active_bitmap(struct amdgpu_device *adev)
{
u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_CC_GC_SA_UNIT_DISABLE);
gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
GRBM_CC_GC_SA_UNIT_DISABLE,
SA_DISABLE);
gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_GC_USER_SA_UNIT_DISABLE);
gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
GRBM_GC_USER_SA_UNIT_DISABLE,
SA_DISABLE);
sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
adev->gfx.config.max_shader_engines);
return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
}
static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
{
u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
u32 rb_mask;
gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
CC_RB_BACKEND_DISABLE,
BACKEND_DISABLE);
gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
GC_USER_RB_BACKEND_DISABLE,
BACKEND_DISABLE);
rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
adev->gfx.config.max_shader_engines);
return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
}
static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
{
u32 rb_bitmap_per_sa;
u32 rb_bitmap_width_per_sa;
u32 max_sa;
u32 active_sa_bitmap;
u32 global_active_rb_bitmap;
u32 active_rb_bitmap = 0;
u32 i;
/* query sa bitmap from SA_UNIT_DISABLE registers */
active_sa_bitmap = gfx_v12_0_get_sa_active_bitmap(adev);
/* query rb bitmap from RB_BACKEND_DISABLE registers */
global_active_rb_bitmap = gfx_v12_0_get_rb_active_bitmap(adev);
/* generate active rb bitmap according to active sa bitmap */
max_sa = adev->gfx.config.max_shader_engines *
adev->gfx.config.max_sh_per_se;
rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se;
rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
for (i = 0; i < max_sa; i++) {
if (active_sa_bitmap & (1 << i))
active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
}
active_rb_bitmap &= global_active_rb_bitmap;
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
#define LDS_APP_BASE 0x1
#define SCRATCH_APP_BASE 0x2
static void gfx_v12_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
uint32_t sh_mem_bases;
uint32_t data;
/*
* Configure apertures:
* LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
* Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
* GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
*/
sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
SCRATCH_APP_BASE;
mutex_lock(&adev->srbm_mutex);
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
soc24_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
/* Enable trap for each kfd vmid. */
data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
}
soc24_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
static void gfx_v12_0_tcp_harvest(struct amdgpu_device *adev)
{
/* TODO: harvest feature to be added later. */
}
static void gfx_v12_0_get_tcc_info(struct amdgpu_device *adev)
{
}
static void gfx_v12_0_constants_init(struct amdgpu_device *adev)
{
u32 tmp;
int i;
if (!amdgpu_sriov_vf(adev))
WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
gfx_v12_0_setup_rb(adev);
gfx_v12_0_get_cu_info(adev, &adev->gfx.cu_info);
gfx_v12_0_get_tcc_info(adev);
adev->gfx.config.pa_sc_tile_steering_override = 0;
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
soc24_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
if (i != 0) {
tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
(adev->gmc.private_aperture_start >> 48));
tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
(adev->gmc.shared_aperture_start >> 48));
WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
}
}
soc24_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
gfx_v12_0_init_compute_vmid(adev);
}
static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev,
int me, int pipe)
{
if (me != 0)
return 0;
switch (pipe) {
case 0:
return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
default :
return 0;
}
}
static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev,
int me, int pipe)
{
/*
* amdgpu controls only the first MEC. That's why this function only
* handles the setting of interrupts for this specific MEC. All other
* pipes' interrupts are set by amdkfd.
*/
if (me != 1)
return 0;
switch (pipe) {
case 0:
return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
case 1:
return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
default :
return 0;
}
}
static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
u32 tmp, cp_int_cntl_reg;
int i, j;
if (amdgpu_sriov_vf(adev))
return ;
for (i = 0; i < adev->gfx.me.num_me; i++) {
for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
if (cp_int_cntl_reg) {
tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
enable ? 1 : 0);
WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
}
}
}
}
static int gfx_v12_0_init_csb(struct amdgpu_device *adev)
{
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
return 0;
}
static void gfx_v12_0_rlc_stop(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
}
static void gfx_v12_0_rlc_reset(struct amdgpu_device *adev)
{
WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
udelay(50);
WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
udelay(50);
}
static void gfx_v12_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
bool enable)
{
uint32_t rlc_pg_cntl;
rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
if (!enable) {
/* RLC_PG_CNTL[23] = 0 (default)
* RLC will wait for handshake acks with SMU
* GFXOFF will be enabled
* RLC_PG_CNTL[23] = 1
* RLC will not issue any message to SMU
* hence no handshake between SMU & RLC
* GFXOFF will be disabled
*/
rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
} else
rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
}
static void gfx_v12_0_rlc_start(struct amdgpu_device *adev)
{
/* TODO: enable rlc & smu handshake until smu
* and gfxoff feature works as expected */
if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
gfx_v12_0_rlc_smu_handshake_cntl(adev, false );
WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
udelay(50);
}
static void gfx_v12_0_rlc_enable_srm(struct amdgpu_device *adev)
{
uint32_t tmp;
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5 C=81 H=95 G=88
¤ Dauer der Verarbeitung: 0.18 Sekunden
¤
*© Formatika GbR, Deutschland