static u16 compute_eu_total(conststruct sseu_dev_info *sseu)
{ int s, ss, total = 0;
for (s = 0; s < sseu->max_slices; s++) for (ss = 0; ss < sseu->max_subslices; ss++) if (sseu->has_xehp_dss)
total += hweight16(sseu->eu_mask.xehp[ss]); else
total += hweight16(sseu->eu_mask.hsw[s][ss]);
return total;
}
/** * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer * @to: Pointer to userspace buffer to copy to * @sseu: SSEU structure containing EU mask to copy * * Copies the EU mask to a userspace buffer in the format expected by * the query ioctl's topology queries. * * Returns the result of the copy_to_user() operation.
*/ int intel_sseu_copy_eumask_to_user(void __user *to, conststruct sseu_dev_info *sseu)
{
u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {}; int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); int len = sseu->max_slices * sseu->max_subslices * eu_stride; int s, ss, i;
for (s = 0; s < sseu->max_slices; s++) { for (ss = 0; ss < sseu->max_subslices; ss++) { int uapi_offset =
s * sseu->max_subslices * eu_stride +
ss * eu_stride;
u16 mask = sseu_get_eus(sseu, s, ss);
for (i = 0; i < eu_stride; i++)
eu_mask[uapi_offset + i] =
(mask >> (BITS_PER_BYTE * i)) & 0xff;
}
}
return copy_to_user(to, eu_mask, len);
}
/** * intel_sseu_copy_ssmask_to_user - Copy subslice mask into a userspace buffer * @to: Pointer to userspace buffer to copy to * @sseu: SSEU structure containing subslice mask to copy * * Copies the subslice mask to a userspace buffer in the format expected by * the query ioctl's topology queries. * * Returns the result of the copy_to_user() operation.
*/ int intel_sseu_copy_ssmask_to_user(void __user *to, conststruct sseu_dev_info *sseu)
{
u8 ss_mask[GEN_SS_MASK_SIZE] = {}; int ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); int len = sseu->max_slices * ss_stride; int s, ss, i;
for (s = 0; s < sseu->max_slices; s++) { for (ss = 0; ss < sseu->max_subslices; ss++) {
i = s * ss_stride * BITS_PER_BYTE + ss;
if (!intel_sseu_has_subslice(sseu, s, ss)) continue;
/* * The concept of slice has been removed in Xe_HP. To be compatible * with prior generations, assume a single slice across the entire * device. Then calculate out the DSS for each workload type within * that software slice.
*/
intel_sseu_set_info(sseu, 1,
32 * max(num_geometry_regs, num_compute_regs),
HAS_ONE_EU_PER_FUSE_BIT(gt->i915) ? 8 : 16);
sseu->has_xehp_dss = 1;
/* * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. * Instead of splitting these, provide userspace with an array * of DSS to more closely represent the hardware resource.
*/
intel_sseu_set_info(sseu, 1, 6, 16);
/* * Although gen12 architecture supported multiple slices, TGL, RKL, * DG1, and ADL only had a single slice.
*/
s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK,
intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE));
drm_WARN_ON(>->i915->drm, s_en != 0x1);
/* * Although gen11 architecture supported multiple slices, ICL and * EHL/JSL only had a single slice in practice.
*/
s_en = REG_FIELD_GET(GEN11_GT_S_ENA_MASK,
intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE));
drm_WARN_ON(>->i915->drm, s_en != 0x1);
/* * CHV expected to always have a uniform distribution of EU * across subslices.
*/
sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
sseu->eu_total /
intel_sseu_subslice_total(sseu) :
0; /* * CHV supports subslice power gating on devices with more than * one subslice, and supports EU power gating on devices with * more than one EU pair per subslice.
*/
sseu->has_slice_pg = 0;
sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
}
/* BXT has a single slice and at most 3 subslices. */
intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
IS_GEN9_LP(i915) ? 3 : 4, 8);
/* * The subslice disable field is global, i.e. it applies * to each of the enabled slices.
*/
subslice_mask = (1 << sseu->max_subslices) - 1;
subslice_mask &= ~REG_FIELD_GET(GEN9_F2_SS_DIS_MASK, fuse2);
/* * Iterate through enabled slices and subslices to * count the total enabled EU.
*/ for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue;
sseu->subslice_mask.hsw[s] = subslice_mask;
eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { int eu_per_ss;
u8 eu_disabled_mask;
if (!intel_sseu_has_subslice(sseu, s, ss)) /* skip disabled subslice */ continue;
/* * Record which subslice(s) has(have) 7 EUs. we * can tune the hash used to spread work among * subslices if they are unbalanced.
*/ if (eu_per_ss == 7)
sseu->subslice_7eu[s] |= BIT(ss);
}
}
sseu->eu_total = compute_eu_total(sseu);
/* * SKL is expected to always have a uniform distribution * of EU across subslices with the exception that any one * EU in any one subslice may be fused off for die * recovery. BXT is expected to be perfectly uniform in EU * distribution.
*/
sseu->eu_per_subslice =
intel_sseu_subslice_total(sseu) ?
DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
0;
/* * SKL+ supports slice power gating on devices with more than * one slice, and supports EU power gating on devices with * more than one EU pair per subslice. BXT+ supports subslice * power gating on devices with more than one subslice, and * supports EU power gating on devices with more than one EU * pair per subslice.
*/
sseu->has_slice_pg =
!IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1;
sseu->has_subslice_pg =
IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1;
sseu->has_eu_pg = sseu->eu_per_subslice > 2;
/* * The subslice disable field is global, i.e. it applies * to each of the enabled slices.
*/
subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
subslice_mask &= ~REG_FIELD_GET(GEN8_F2_SS_DIS_MASK, fuse2);
eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0);
eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1);
eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2);
eu_disable[0] =
REG_FIELD_GET(GEN8_EU_DIS0_S0_MASK, eu_disable0);
eu_disable[1] =
REG_FIELD_GET(GEN8_EU_DIS0_S1_MASK, eu_disable0) |
REG_FIELD_GET(GEN8_EU_DIS1_S1_MASK, eu_disable1) << hweight32(GEN8_EU_DIS0_S1_MASK);
eu_disable[2] =
REG_FIELD_GET(GEN8_EU_DIS1_S2_MASK, eu_disable1) |
REG_FIELD_GET(GEN8_EU_DIS2_S2_MASK, eu_disable2) << hweight32(GEN8_EU_DIS1_S2_MASK);
/* * Iterate through enabled slices and subslices to * count the total enabled EU.
*/ for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue;
sseu->subslice_mask.hsw[s] = subslice_mask;
for (ss = 0; ss < sseu->max_subslices; ss++) {
u8 eu_disabled_mask;
u32 n_disabled;
if (!intel_sseu_has_subslice(sseu, s, ss)) /* skip disabled subslice */ continue;
/* * Record which subslices have 7 EUs.
*/ if (sseu->max_eus_per_subslice - n_disabled == 7)
sseu->subslice_7eu[s] |= 1 << ss;
}
}
sseu->eu_total = compute_eu_total(sseu);
/* * BDW is expected to always have a uniform distribution of EU across * subslices with the exception that any one EU in any one subslice may * be fused off for die recovery.
*/
sseu->eu_per_subslice =
intel_sseu_subslice_total(sseu) ?
DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
0;
/* * BDW supports slice power gating on devices with more than * one slice.
*/
sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1;
sseu->has_subslice_pg = 0;
sseu->has_eu_pg = 0;
}
/* * There isn't a register to tell us how many slices/subslices. We * work off the PCI-ids here.
*/ switch (INTEL_INFO(i915)->gt) { default:
MISSING_CASE(INTEL_INFO(i915)->gt);
fallthrough; case 1:
sseu->slice_mask = BIT(0);
subslice_mask = BIT(0); break; case 2:
sseu->slice_mask = BIT(0);
subslice_mask = BIT(0) | BIT(1); break; case 3:
sseu->slice_mask = BIT(0) | BIT(1);
subslice_mask = BIT(0) | BIT(1); break;
}
/* * No explicit RPCS request is needed to ensure full * slice/subslice/EU enablement prior to Gen9.
*/ if (GRAPHICS_VER(i915) < 9) return 0;
/* * If i915/perf is active, we want a stable powergating configuration * on the system. Use the configuration pinned by i915/perf.
*/ if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
req_sseu = >->perf.sseu;
/* * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits * wide and Icelake has up to eight subslices, specfial programming is * needed in order to correctly enable all subslices. * * According to documentation software must consider the configuration * as 2x4x8 and hardware will translate this to 1x8x8. * * Furthermore, even though SScount is three bits, maximum documented * value for it is four. From this some rules/restrictions follow: * * 1. * If enabled subslice count is greater than four, two whole slices must * be enabled instead. * * 2. * When more than one slice is enabled, hardware ignores the subslice * count altogether. * * From these restrictions it follows that it is not possible to enable * a count of subslices between the SScount maximum of four restriction, * and the maximum available number on a particular SKU. Either all * subslices are enabled, or a count between one and four on the first * slice.
*/ if (GRAPHICS_VER(i915) == 11 &&
slices == 1 &&
subslices > min_t(u8, 4, hweight8(sseu->subslice_mask.hsw[0]) / 2)) {
GEM_BUG_ON(subslices & 1);
subslice_pg = false;
slices *= 2;
}
/* * Starting in Gen9, render power gating can leave * slice/subslice/EU in a partially enabled state. We * must make an explicit request through RPCS for full * enablement.
*/ if (sseu->has_slice_pg) {
u32 mask, val = slices;
if (GRAPHICS_VER(i915) >= 11) {
mask = GEN11_RPCS_S_CNT_MASK;
val <<= GEN11_RPCS_S_CNT_SHIFT;
} else {
mask = GEN8_RPCS_S_CNT_MASK;
val <<= GEN8_RPCS_S_CNT_SHIFT;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.