/** * DOC: RC6 * * RC6 is a special power stage which allows the GPU to enter an very * low-voltage mode when idle, using down to 0V while at this stage. This * stage is entered automatically when the GPU is idle when RC6 support is * enabled, and as soon as new workload arises GPU wakes up automatically as * well. * * There are different RC6 modes available in Intel GPU, which differentiate * among each other with the latency required to enter and leave RC6 and * voltage consumed by the GPU in different states. * * The combination of the following flags define which states GPU is allowed * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and * RC6pp is deepest RC6. Their support by hardware varies according to the * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one * which brings the most power savings; deeper states save more power, but * require higher latency to switch to and wake up.
*/
/* * With GuCRC, these parameters are set by GuC
*/ if (!intel_uc_uses_guc_rc(>->uc)) { /* 2b: Program RC6 thresholds.*/
intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
}
/* * 2c: Program Coarse Power Gating Policies. * * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we * use instead is a more conservative estimate for the maximum time * it takes us to service a CS interrupt and submit a new ELSP - that * is the time which the GPU is idle waiting for the CPU to select the * next request to execute. If the idle hysteresis is less than that * interrupt service latency, the hardware will automatically gate * the power well and we will then incur the wake up cost on top of * the service latency. A similar guide from plane_state is that we * do not want the enable hysteresis to less than the wakeup latency. * * igt/gem_exec_nop/sequential provides a rough estimate for the * service latency, and puts it under 10us for Icelake, similar to * Broadwell+, To be conservative, we want to factor in a context * switch on top (due to ksoftirqd).
*/
intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
/* 3a: Enable RC6 * * With GuCRC, we do not enable bit 31 of RC_CTL, * thus allowing GuC to control RC6 entry/exit fully instead. * We will not set the HW ENABLE and EI bits
*/ if (!intel_guc_rc_enable(gt_to_guc(gt)))
rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE; else
rc6->ctl_enable =
GEN6_RC_CTL_HW_ENABLE |
GEN6_RC_CTL_RC6_ENABLE |
GEN6_RC_CTL_EI_MODE(1);
if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { for (i = 0; i < I915_MAX_VCS; i++) if (HAS_ENGINE(gt, _VCS(i)))
pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) |
VDN_MFX_POWERGATE_ENABLE(i));
}
/* * 2c: Program Coarse Power Gating Policies. * * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we * use instead is a more conservative estimate for the maximum time * it takes us to service a CS interrupt and submit a new ELSP - that * is the time which the GPU is idle waiting for the CPU to select the * next request to execute. If the idle hysteresis is less than that * interrupt service latency, the hardware will automatically gate * the power well and we will then incur the wake up cost on top of * the service latency. A similar guide from plane_state is that we * do not want the enable hysteresis to less than the wakeup latency. * * igt/gem_exec_nop/sequential provides a rough estimate for the * service latency, and puts it around 10us for Broadwell (and other * big core) and around 40us for Broxton (and other low power cores). * [Note that for legacy ringbuffer submission, this is less than 1us!] * However, the wakeup latency on Broxton is closer to 100us. To be * conservative, we have to factor in a context switch on top (due * to ksoftirqd).
*/
intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
/* 3a: Enable RC6 */
intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
/* * WaRsDisableCoarsePowerGating:skl,cnl * - Render/Media PG need to be disabled with RC6.
*/ if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
intel_uncore_write_fw(uncore, GEN9_PG_ENABLE,
GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
}
/* We don't use those on Haswell */
rc6_mask = GEN6_RC_CTL_RC6_ENABLE; if (HAS_RC6p(i915))
rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; if (HAS_RC6pp(i915))
rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
rc6->ctl_enable =
rc6_mask |
GEN6_RC_CTL_EI_MODE(1) |
GEN6_RC_CTL_HW_ENABLE;
rc6vids = 0;
ret = snb_pcode_read(rc6_to_gt(rc6)->uncore, GEN6_PCODE_READ_RC6VIDS, &rc6vids, NULL); if (GRAPHICS_VER(i915) == 6 && ret) {
drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n");
} elseif (GRAPHICS_VER(i915) == 6 &&
(GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
drm_dbg(&i915->drm, "You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
rc6vids &= 0xffff00;
rc6vids |= GEN6_ENCODE_RC6_VID(450);
ret = snb_pcode_write(rc6_to_gt(rc6)->uncore, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); if (ret)
drm_err(&i915->drm, "Couldn't fix incorrect rc6 voltage\n");
}
}
/* Check that the pcbr address is not empty. */ staticint chv_rc6_init(struct intel_rc6 *rc6)
{ struct intel_uncore *uncore = rc6_to_uncore(rc6); struct drm_i915_private *i915 = rc6_to_i915(rc6);
resource_size_t pctx_paddr, paddr;
resource_size_t pctx_size = 32 * SZ_1K;
u32 pcbr;
pcbr = intel_uncore_read(uncore, VLV_PCBR); if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
paddr = i915->dsm.stolen.end + 1 - pctx_size;
GEM_BUG_ON(paddr > U32_MAX);
drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
/* * From the Gunit register HAS: * The Gfx driver is expected to program this register and ensure * proper allocation within Gfx stolen memory. For example, this * register should be programmed such than the PCBR range does not * overlap with other ranges, such as the frame buffer, protected * memory, or any other relevant ranges.
*/
pctx = i915_gem_object_create_stolen(i915, pctx_size); if (IS_ERR(pctx)) {
drm_dbg(&i915->drm, "not enough stolen space for PCTX, disabling\n"); return PTR_ERR(pctx);
}
if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
drm_dbg(&i915->drm, "RC6 Base location not set properly.\n");
enable_rc6 = false;
}
/* * The exact context size is not known for BXT, so assume a page size * for this check.
*/
rc6_ctx_base =
intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK; if (!(rc6_ctx_base >= i915->dsm.reserved.start &&
rc6_ctx_base + PAGE_SIZE < i915->dsm.reserved.end)) {
drm_dbg(&i915->drm, "RC6 Base address not as expected.\n");
enable_rc6 = false;
}
/* We want the BIOS C6 state preserved across loads for MTL */ if (IS_METEORLAKE(rc6_to_i915(rc6)) && rc6->bios_state_captured)
intel_uncore_write_fw(uncore, GEN6_RC_STATE, rc6->bios_rc_state);
pctx = fetch_and_zero(&rc6->pctx); if (pctx)
i915_gem_object_put(pctx);
/* * The register accessed do not need forcewake. We borrow * uncore lock to prevent concurrent access to range reg.
*/
lockdep_assert_held(&uncore->lock);
/* * vlv and chv residency counters are 40 bits in width. * With a control bit, we can choose between upper or lower * 32bit window into this counter. * * Although we always use the counter in high-range mode elsewhere, * userspace may attempt to read the value before rc6 is initialised, * before we have set the default VLV_COUNTER_CONTROL value. So always * set the high bit to be safe.
*/
intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
upper = intel_uncore_read_fw(uncore, reg); do {
tmp = upper;
/* On VLV and CHV, residency time is in CZ units rather than 1.28us */ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
mul = 1000000;
div = i915->czclk_freq;
overflow_hw = BIT_ULL(40);
time_hw = vlv_residency_raw(uncore, reg);
} else { /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ if (IS_GEN9_LP(i915)) {
mul = 10000;
div = 12;
} else {
mul = 1280;
div = 1;
}
/* * Counter wrap handling. * * Store previous hw counter values for counter wrap-around handling. But * relying on a sufficient frequency of queries otherwise counters can still wrap.
*/
prev_hw = rc6->prev_hw_residency[id];
rc6->prev_hw_residency[id] = time_hw;
/* RC6 delta from last sample. */ if (time_hw >= prev_hw)
time_hw -= prev_hw; else
time_hw += overflow_hw - prev_hw;
/* Add delta to RC6 extended raw driver copy. */
time_hw += rc6->cur_residency[id];
rc6->cur_residency[id] = time_hw;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.