staticint etnaviv_gpu_reset_deassert(struct etnaviv_gpu *gpu)
{ int ret;
/* * 32 core clock cycles (slowest clock) required before deassertion * 1 microsecond might match all implementations without computation
*/
usleep_range(1, 2);
ret = reset_control_deassert(gpu->rst); if (ret) return ret;
/* * 128 core clock cycles (slowest clock) required before any activity on AHB * 1 microsecond might match all implementations without computation
*/
usleep_range(1, 2);
/* This overrides the value from older register if non-zero */
streams = etnaviv_field(specs[3],
VIVS_HI_CHIP_SPECS_4_STREAM_COUNT); if (streams)
gpu->identity.stream_count = streams;
}
/* Fill in the stream count if not specified */ if (gpu->identity.stream_count == 0) { if (gpu->identity.model >= 0x1000)
gpu->identity.stream_count = 4; else
gpu->identity.stream_count = 1;
}
/* Convert the register max value */ if (gpu->identity.register_max)
gpu->identity.register_max = 1 << gpu->identity.register_max; elseif (gpu->identity.model == chipModel_GC400)
gpu->identity.register_max = 32; else
gpu->identity.register_max = 64;
/* * Reading these two registers on GC600 rev 0x19 result in a * unhandled fault: external abort on non-linefetch
*/ if (!etnaviv_is_model_rev(gpu, 0x600, 0x19)) {
gpu->identity.product_id = gpu_read(gpu, VIVS_HI_CHIP_PRODUCT_ID);
gpu->identity.eco_id = gpu_read(gpu, VIVS_HI_CHIP_ECO_ID);
}
/* * !!!! HACK ALERT !!!! * Because people change device IDs without letting software * know about it - here is the hack to make it all look the * same. Only for GC400 family.
*/ if ((gpu->identity.model & 0xff00) == 0x0400 &&
gpu->identity.model != chipModel_GC420) {
gpu->identity.model = gpu->identity.model & 0x0400;
}
/* Another special case */ if (etnaviv_is_model_rev(gpu, 0x300, 0x2201)) {
u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
if (chipDate == 0x20080814 && chipTime == 0x12051100) { /* * This IP has an ECO; put the correct * revision in it.
*/
gpu->identity.revision = 0x1051;
}
}
/* * NXP likes to call the GPU on the i.MX6QP GC2000+, but in * reality it's just a re-branded GC3000. We can identify this * core by the upper half of the revision register being all 1. * Fix model/rev here, so all other places can refer to this * core by its real identity.
*/ if (etnaviv_is_model_rev(gpu, 0x2000, 0xffff5450)) {
gpu->identity.model = chipModel_GC3000;
gpu->identity.revision &= 0xffff;
}
gpu->idle_mask = ~VIVS_HI_IDLE_STATE_AXI_LP; /* * If there is a match in the HWDB, we aren't interested in the * remaining register values, as they might be wrong.
*/ if (etnaviv_fill_identity_from_hwdb(gpu)) return;
/* Disable fast clear on GC700. */ if (gpu->identity.model == chipModel_GC700)
gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
/* These models/revisions don't have the 2D pipe bit */ if ((gpu->identity.model == chipModel_GC500 &&
gpu->identity.revision <= 2) ||
gpu->identity.model == chipModel_GC300)
gpu->identity.features |= chipFeatures_PIPE_2D;
/* * Choose number of wait cycles to target a ~30us (1/32768) max latency * until new work is picked up by the FE when it polls in the idle loop. * If the GPU base frequency is unknown use 200 wait cycles.
*/
gpu->fe_waitcycles = clamp(gpu->base_rate_core >> (15 - gpu->freq_scale),
200UL, 0xffffUL);
}
/* Disable PA clock gating for GC400+ without bugfix except for GC420 */ if (gpu->identity.model >= chipModel_GC400 &&
gpu->identity.model != chipModel_GC420 &&
!(gpu->identity.minor_features3 & chipMinorFeatures3_BUG_FIXES12))
pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PA;
/* * Disable PE clock gating on revs < 5.0.0.0 when HZ is * present without a bug fix.
*/ if (gpu->identity.revision < 0x5000 &&
gpu->identity.minor_features0 & chipMinorFeatures0_HZ &&
!(gpu->identity.minor_features1 &
chipMinorFeatures1_DISABLE_PE_GATING))
pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PE;
if (gpu->identity.revision < 0x5422)
pmc |= BIT(15); /* Unknown bit */
/* Disable SE and RA clock gating on affected core revisions. */ if (etnaviv_is_model_rev(gpu, 0x7000, 0x6202))
pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_SE |
VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA;
staticvoid etnaviv_gpu_setup_pulse_eater(struct etnaviv_gpu *gpu)
{ /* * Base value for VIVS_PM_PULSE_EATER register on models where it * cannot be read, extracted from vivante kernel driver.
*/
u32 pulse_eater = 0x01590880;
/* * Update GPU AXI cache atttribute to "cacheable, no allocate". * This is necessary to prevent the iMX6 SoC locking up.
*/
gpu_write(gpu, VIVS_HI_AXI_CONFIG,
VIVS_HI_AXI_CONFIG_AWCACHE(2) |
VIVS_HI_AXI_CONFIG_ARCACHE(2));
/* GC2000 rev 5108 needs a special bus config */ if (etnaviv_is_model_rev(gpu, 0x2000, 0x5108)) {
u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
bus_config |= VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG(1) |
VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG(0);
gpu_write(gpu, VIVS_MC_BUS_CONFIG, bus_config);
}
if (gpu->sec_mode == ETNA_SEC_KERNEL) {
u32 val = gpu_read(gpu, VIVS_MMUv2_AHB_CONTROL);
val |= VIVS_MMUv2_AHB_CONTROL_NONSEC_ACCESS;
gpu_write(gpu, VIVS_MMUv2_AHB_CONTROL, val);
}
/* setup the pulse eater */
etnaviv_gpu_setup_pulse_eater(gpu);
gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
gpu->state = ETNA_GPU_STATE_INITIALIZED;
}
int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
{ struct etnaviv_drm_private *priv = gpu->drm->dev_private;
dma_addr_t cmdbuf_paddr; int ret, i;
ret = pm_runtime_get_sync(gpu->dev); if (ret < 0) {
dev_err(gpu->dev, "Failed to enable GPU power domain\n"); goto pm_put;
}
ret = etnaviv_gpu_reset_deassert(gpu); if (ret) {
dev_err(gpu->dev, "GPU reset deassert failed\n"); goto fail;
}
etnaviv_hw_identify(gpu);
if (gpu->identity.model == 0) {
dev_err(gpu->dev, "Unknown GPU model\n");
ret = -ENXIO; goto fail;
}
if (gpu->identity.nn_core_count > 0)
dev_warn(gpu->dev, "etnaviv has been instantiated on a NPU, " "for which the UAPI is still experimental\n");
/* Exclude VG cores with FE2.0 */ if (gpu->identity.features & chipFeatures_PIPE_VG &&
gpu->identity.features & chipFeatures_FE20) {
dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
ret = -ENXIO; goto fail;
}
/* * On cores with security features supported, we claim control over the * security states.
*/ if ((gpu->identity.minor_features7 & chipMinorFeatures7_BIT_SECURITY) &&
(gpu->identity.minor_features10 & chipMinorFeatures10_SECURITY_AHB))
gpu->sec_mode = ETNA_SEC_KERNEL;
gpu->state = ETNA_GPU_STATE_IDENTIFIED;
ret = etnaviv_hw_reset(gpu); if (ret) {
dev_err(gpu->dev, "GPU reset failed\n"); goto fail;
}
ret = etnaviv_iommu_global_init(gpu); if (ret) goto fail;
/* Create buffer: */
ret = etnaviv_cmdbuf_init(priv->cmdbuf_suballoc, &gpu->buffer, SZ_4K); if (ret) {
dev_err(gpu->dev, "could not create command buffer\n"); goto fail;
}
/* * Set the GPU linear window to cover the cmdbuf region, as the GPU * won't be able to start execution otherwise. The alignment to 128M is * chosen arbitrarily but helps in debugging, as the MMU offset * calculations are much more straight forward this way. * * On MC1.0 cores the linear window offset is ignored by the TS engine, * leading to inconsistent memory views. Avoid using the offset on those * cores if possible, otherwise disable the TS feature. MMUv2 doesn't * expose this issue, as all TS accesses are MMU translated, so the * linear window offset won't be used.
*/
cmdbuf_paddr = ALIGN_DOWN(etnaviv_cmdbuf_get_pa(&gpu->buffer), SZ_128M);
if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
(gpu->identity.minor_features0 & chipMinorFeatures0_MC20) ||
(gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION)) { if (cmdbuf_paddr >= SZ_2G)
priv->mmu_global->memory_base = SZ_2G; else
priv->mmu_global->memory_base = cmdbuf_paddr;
} elseif (cmdbuf_paddr + SZ_128M >= SZ_2G) {
dev_info(gpu->dev, "Need to move linear window on MC1.0, disabling TS\n");
gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
priv->mmu_global->memory_base = SZ_2G;
}
/* Setup event management */
spin_lock_init(&gpu->event_spinlock);
init_completion(&gpu->event_free);
bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS); for (i = 0; i < ARRAY_SIZE(gpu->event); i++)
complete(&gpu->event_free);
/* Now program the hardware */
mutex_lock(&gpu->lock);
etnaviv_gpu_hw_init(gpu);
mutex_unlock(&gpu->lock);
seq_printf(m, "\taxi: 0x%08x\n", axi);
seq_printf(m, "\tidle: 0x%08x\n", idle);
idle |= ~gpu->idle_mask & ~VIVS_HI_IDLE_STATE_AXI_LP; if ((idle & VIVS_HI_IDLE_STATE_FE) == 0)
seq_puts(m, "\t FE is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_DE) == 0)
seq_puts(m, "\t DE is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_PE) == 0)
seq_puts(m, "\t PE is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_SH) == 0)
seq_puts(m, "\t SH is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_PA) == 0)
seq_puts(m, "\t PA is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_SE) == 0)
seq_puts(m, "\t SE is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_RA) == 0)
seq_puts(m, "\t RA is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_TX) == 0)
seq_puts(m, "\t TX is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_VG) == 0)
seq_puts(m, "\t VG is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_IM) == 0)
seq_puts(m, "\t IM is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_FP) == 0)
seq_puts(m, "\t FP is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_TS) == 0)
seq_puts(m, "\t TS is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_BL) == 0)
seq_puts(m, "\t BL is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_ASYNCFE) == 0)
seq_puts(m, "\t ASYNCFE is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_MC) == 0)
seq_puts(m, "\t MC is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_PPA) == 0)
seq_puts(m, "\t PPA is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_WD) == 0)
seq_puts(m, "\t WD is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_NN) == 0)
seq_puts(m, "\t NN is not idle\n"); if ((idle & VIVS_HI_IDLE_STATE_TP) == 0)
seq_puts(m, "\t TP is not idle\n"); if (idle & VIVS_HI_IDLE_STATE_AXI_LP)
seq_puts(m, "\t AXI low power mode\n");
/* * GPU lock must already be held, otherwise fence completion order might * not match the seqno order assigned here.
*/
lockdep_assert_held(&gpu->lock);
f = kzalloc(sizeof(*f), GFP_KERNEL); if (!f) return NULL;
for (i = 0; i < nr_events; i++) {
ret = pm_runtime_resume_and_get(gpu->dev); if (ret) goto out_rpm;
rpm_count++;
}
return 0;
out_rpm: for (i = 0; i < rpm_count; i++)
pm_runtime_put_autosuspend(gpu->dev);
out: for (i = 0; i < acquired; i++)
complete(&gpu->event_free);
return ret;
}
staticvoid event_free(struct etnaviv_gpu *gpu, unsignedint event)
{ if (!test_bit(event, gpu->event_bitmap)) {
dev_warn(gpu->dev, "event %u is already marked as free",
event);
} else {
clear_bit(event, gpu->event_bitmap);
complete(&gpu->event_free);
}
pm_runtime_put_autosuspend(gpu->dev);
}
/* * Cmdstream submission/retirement:
*/ int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
u32 id, struct drm_etnaviv_timespec *timeout)
{ struct dma_fence *fence; int ret;
/* * Look up the fence and take a reference. We might still find a fence * whose refcount has already dropped to zero. dma_fence_get_rcu * pretends we didn't find a fence in that case.
*/
rcu_read_lock();
fence = xa_load(&gpu->user_fences, id); if (fence)
fence = dma_fence_get_rcu(fence);
rcu_read_unlock();
if (!fence) return 0;
if (!timeout) { /* No timeout was requested: just test for completion */
ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
} else { unsignedlong remaining = etnaviv_timeout_to_jiffies(timeout);
ret = dma_fence_wait_timeout(fence, true, remaining); if (ret == 0)
ret = -ETIMEDOUT; elseif (ret != -ERESTARTSYS)
ret = 0;
}
dma_fence_put(fence); return ret;
}
/* * Wait for an object to become inactive. This, on it's own, is not race * free: the object is moved by the scheduler off the active list, and * then the iova is put. Moreover, the object could be re-submitted just * after we notice that it's become inactive. * * Although the retirement happens under the gpu lock, we don't want to hold * that lock in this function while waiting.
*/ int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu, struct etnaviv_gem_object *etnaviv_obj, struct drm_etnaviv_timespec *timeout)
{ unsignedlong remaining; long ret;
if (!timeout) return !is_active(etnaviv_obj) ? 0 : -EBUSY;
/* enable clock gating */
val = gpu_read_power(gpu, VIVS_PM_POWER_CONTROLS);
val |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
gpu_write_power(gpu, VIVS_PM_POWER_CONTROLS, val);
mutex_unlock(&gpu->lock);
for (i = 0; i < submit->nr_pmrs; i++) { conststruct etnaviv_perfmon_request *pmr = submit->pmrs + i;
*pmr->bo_vma = pmr->sequence;
}
}
/* add bo's to gpu's ring, and kick gpu: */ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
{ struct etnaviv_gpu *gpu = submit->gpu; struct dma_fence *gpu_fence; unsignedint i, nr_events = 1, event[3]; int ret;
/* * if there are performance monitor requests we need to have * - a sync point to re-configure gpu and process ETNA_PM_PROCESS_PRE * requests. * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests * and update the sequence number for userspace.
*/ if (submit->nr_pmrs)
nr_events = 3;
ret = event_alloc(gpu, nr_events, event); if (ret) {
DRM_ERROR("no free events\n");
pm_runtime_put_noidle(gpu->dev); return NULL;
}
mutex_lock(&gpu->lock);
gpu_fence = etnaviv_gpu_fence_alloc(gpu); if (!gpu_fence) { for (i = 0; i < nr_events; i++)
event_free(gpu, event[i]);
goto out_unlock;
}
if (gpu->state == ETNA_GPU_STATE_INITIALIZED)
etnaviv_gpu_start_fe_idleloop(gpu, submit->mmu_context);
if (submit->prev_mmu_context)
etnaviv_iommu_context_put(submit->prev_mmu_context);
submit->prev_mmu_context = etnaviv_iommu_context_get(gpu->mmu_context);
if (pm_runtime_get_sync(gpu->dev) < 0) goto pm_put;
mutex_lock(&gpu->lock);
etnaviv_hw_reset(gpu);
/* complete all events, the GPU won't do it after the reset */
spin_lock(&gpu->event_spinlock);
for_each_set_bit(i, gpu->event_bitmap, ETNA_NR_EVENTS)
event_free(gpu, i);
spin_unlock(&gpu->event_spinlock);
while ((event = ffs(intr)) != 0) { struct dma_fence *fence;
event -= 1;
intr &= ~(1 << event);
dev_dbg(gpu->dev, "event %u\n", event);
if (gpu->event[event].sync_point) {
gpu->sync_point_event = event;
queue_work(gpu->wq, &gpu->sync_point_work);
}
fence = gpu->event[event].fence; if (!fence) continue;
gpu->event[event].fence = NULL;
/* * Events can be processed out of order. Eg, * - allocate and queue event 0 * - allocate event 1 * - event 0 completes, we process it * - allocate and queue event 0 * - event 1 and event 0 complete * we can end up processing event 0 first, then 1.
*/ if (fence_after(fence->seqno, gpu->completed_fence))
gpu->completed_fence = fence->seqno;
dma_fence_signal_timestamp(fence, now);
event_free(gpu, event);
}
ret = IRQ_HANDLED;
}
return ret;
}
staticint etnaviv_gpu_clk_enable(struct etnaviv_gpu *gpu)
{ int ret;
ret = clk_prepare_enable(gpu->clk_reg); if (ret) return ret;
ret = clk_prepare_enable(gpu->clk_bus); if (ret) goto disable_clk_reg;
ret = clk_prepare_enable(gpu->clk_core); if (ret) goto disable_clk_bus;
ret = clk_prepare_enable(gpu->clk_shader); if (ret) goto disable_clk_core;
do {
u32 idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
if ((idle & gpu->idle_mask) == gpu->idle_mask) return 0;
if (time_is_before_jiffies(timeout)) {
dev_warn(gpu->dev, "timed out waiting for idle: idle=0x%x\n",
idle); return -ETIMEDOUT;
}
udelay(5);
} while (1);
}
staticvoid etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
{ if (gpu->state == ETNA_GPU_STATE_RUNNING) { /* Replace the last WAIT with END */
mutex_lock(&gpu->lock);
etnaviv_buffer_end(gpu);
mutex_unlock(&gpu->lock);
/* * We know that only the FE is busy here, this should * happen quickly (as the WAIT is only 200 cycles). If * we fail, just warn and continue.
*/
etnaviv_gpu_wait_idle(gpu, 100);
gpu->state = ETNA_GPU_STATE_INITIALIZED;
}
gpu->exec_state = -1;
}
staticint etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
{ int ret;
ret = mutex_lock_killable(&gpu->lock); if (ret) return ret;
/* Get Reset: */
gpu->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL); if (IS_ERR(gpu->rst)) return dev_err_probe(dev, PTR_ERR(gpu->rst), "failed to get reset\n");
err = reset_control_assert(gpu->rst); if (err) return dev_err_probe(dev, err, "failed to assert reset\n");
/* Get Interrupt: */
gpu->irq = platform_get_irq(pdev, 0); if (gpu->irq < 0) return gpu->irq;
/* TODO: figure out max mapped size */
dev_set_drvdata(dev, gpu);
/* * We treat the device as initially suspended. The runtime PM * autosuspend delay is rather arbitary: no measurements have * yet been performed to determine an appropriate value.
*/
pm_runtime_use_autosuspend(dev);
pm_runtime_set_autosuspend_delay(dev, 200);
pm_runtime_enable(dev);
err = component_add(dev, &gpu_ops); if (err < 0) {
dev_err(dev, "failed to register component: %d\n", err); return err;
}
ret = etnaviv_gpu_clk_enable(gpu); if (ret) return ret;
/* Re-initialise the basic hardware state */ if (gpu->state == ETNA_GPU_STATE_IDENTIFIED) {
ret = etnaviv_gpu_hw_resume(gpu); if (ret) {
etnaviv_gpu_clk_disable(gpu); return ret;
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.