/* * Our goal here is simple, we want to greedily fill the object from * largest to smallest page-size, while ensuring that we use *every* * page-size as per the given page-mask.
*/ do { unsignedint bit = ilog2(page_mask); unsignedint page_size = BIT(bit); int order = get_order(page_size);
/* Use optimal page sized chunks to fill in the sg table */
rem = obj->base.size;
sg = st->sgl;
st->nents = 0; do { unsignedint page_size = get_largest_page_size(i915, rem); unsignedint len = min(page_size * div_u64(rem, page_size),
max_len);
/* * The dma-api is like a box of chocolates when it comes to the * alignment of dma addresses, however for LMEM we have total control * and so can guarantee alignment, likewise when we allocate our blocks * they should appear in descending order, and if we know that we align * to the largest page size for the GTT address, we should be able to * assert that if we see 2M physical pages then we should also get 2M * GTT pages. If we don't then something might be wrong in our * construction of the backing pages. * * Maintaining alignment is required to utilise huge pages in the ppGGT.
*/ if (i915_gem_object_is_lmem(obj) &&
IS_ALIGNED(i915_vma_offset(vma), SZ_2M) &&
vma->page_sizes.sg & SZ_2M &&
vma->resource->page_sizes_gtt < SZ_2M) {
pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n",
vma->page_sizes.sg, vma->resource->page_sizes_gtt);
err = -EINVAL;
}
return err;
}
staticint igt_mock_exhaust_device_supported_pages(void *arg)
{ struct i915_ppgtt *ppgtt = arg; struct drm_i915_private *i915 = ppgtt->vm.i915; unsignedint saved_mask = RUNTIME_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj; struct i915_vma *vma; int i, j, single; int err;
/* * Sanity check creating objects with every valid page support * combination for our mock device.
*/
for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { unsignedint combination = SZ_4K; /* Required for ppGTT */
for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { if (i & BIT(j))
combination |= page_sizes[j];
}
RUNTIME_INFO(i915)->page_sizes = combination;
for (single = 0; single <= 1; ++single) {
obj = fake_huge_pages_object(i915, combination, !!single); if (IS_ERR(obj)) {
err = PTR_ERR(obj); goto out_device;
}
/* * Sanity check dma misalignment for huge pages -- the dma addresses we * insert into the paging structures need to always respect the page * size alignment.
*/
/* * Try all the other valid offsets until the next * boundary -- should always fall back to using 4K * pages.
*/ for (offset = 4096; offset < page_size; offset += 4096) {
err = i915_vma_unbind_unlocked(vma); if (err) goto out_unpin;
/* vma start must be aligned to BIT(21) to allow 2M PTEs */
err = i915_vma_pin(vma, 0, BIT(21), PIN_USER); if (err) break;
err = igt_check_page_sizes(vma); if (err) {
i915_vma_unpin(vma); break;
}
/* * Figure out the expected gtt page size knowing that we go from * largest to smallest page size sg chunks, and that we align to * the largest page size.
*/ for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { unsignedint page_size = page_sizes[i];
/* * Sanity check some of the trickiness with 64K pages -- either we can * safely mark the whole page-table(2M block) as 64K, or we have to * always fallback to 4K.
*/
if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) return 0;
file = mock_file(i915); if (IS_ERR(file)) return PTR_ERR(file);
ctx = hugepage_ctx(i915, file); if (IS_ERR(ctx)) {
err = PTR_ERR(ctx); goto out;
}
vm = i915_gem_context_get_eb_vm(ctx);
for (i = 0; i < ARRAY_SIZE(objects); ++i) { unsignedint size = objects[i].size; unsignedint expected_gtt = objects[i].gtt; unsignedint offset = objects[i].offset; unsignedint flags = PIN_USER;
/* * For modern GTT models, the requirements for marking a page-table * as 64K have been relaxed. Account for this.
*/ if (has_pte64) {
expected_gtt = 0; if (size >= SZ_64K)
expected_gtt |= I915_GTT_PAGE_SIZE_64K; if (size & (SZ_64K - 1))
expected_gtt |= I915_GTT_PAGE_SIZE_4K;
}
for (single = 0; single <= 1; single++) {
obj = fake_huge_pages_object(i915, size, !!single); if (IS_ERR(obj)) {
err = PTR_ERR(obj); goto out_vm;
}
err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_object_put;
/* * Disable 2M pages -- We only want to use 64K/4K pages * for this test.
*/
obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M;
vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma);
err = i915_vma_pin(vma, size, 0, flags | offset); if (err) { /* * The ggtt may have some pages reserved so * refrain from erroring out.
*/ if (err == -ENOSPC && i915_is_ggtt(ce->vm))
err = 0;
return err;
}
err = igt_check_page_sizes(vma); if (err) goto out_vma_unpin;
err = gpu_write(ce, vma, dword, val); if (err) {
pr_err("gpu-write failed at offset=%llx\n", offset); goto out_vma_unpin;
}
err = cpu_check(obj, dword, val); if (err) {
pr_err("cpu-check failed at offset=%llx\n", offset); goto out_vma_unpin;
}
n = 0;
count = 0;
max = U64_MAX;
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
count++; if (!intel_engine_can_store_dword(ce->engine)) continue;
max = min(max, ce->vm->total);
n++;
}
i915_gem_context_unlock_engines(ctx); if (!n) goto out;
/* * To keep things interesting when alternating between engines in our * randomized order, lets also make feeding to the same engine a few * times in succession a possibility by enlarging the permutation array.
*/
order = i915_random_order(count * count, &prng); if (!order) {
err = -ENOMEM; goto out;
}
max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
max = div_u64(max - size, max_page_size);
/* * Try various offsets in an ascending/descending fashion until we * timeout -- we want to avoid issues hidden by effectively always using * offset = 0.
*/
i = 0;
engines = i915_gem_context_lock_engines(ctx);
for_each_prime_number_from(num, 0, max) {
u64 offset_low = num * max_page_size;
u64 offset_high = (max - num) * max_page_size;
u32 dword = offset_in_page(num) / 4; struct intel_context *ce;
ce = engines->engines[order[i] % engines->num_engines];
i = (i + 1) % (count * count); if (!ce || !intel_engine_can_store_dword(ce->engine)) continue;
/* * In order to utilize 64K pages we need to both pad the vma * size and ensure the vma offset is at the start of the pt * boundary, however to improve coverage we opt for testing both * aligned and unaligned offsets. * * With PS64 this is no longer the case, but to ensure we * sometimes get the compact layout for smaller objects, apply * the round_up anyway.
*/ if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
offset_low = round_down(offset_low,
I915_GTT_PAGE_SIZE_2M);
err = __igt_write_huge(ce, obj, size, offset_low,
dword, num + 1); if (err) break;
err = __igt_write_huge(ce, obj, size, offset_high,
dword, num + 1); if (err) break;
if (igt_timeout(end_time, "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
__func__, ce->engine->name, offset_low, offset_high,
max_page_size)) break;
}
i915_gem_context_unlock_engines(ctx);
/* * Sanity check that the HW behaves with a limited set of combinations. * We already have a bunch of randomised testing, which should give us * a decent amount of variation between runs, however we should keep * this to limit the chances of introducing a temporary regression, by * testing the most obvious cases that might make something blow up.
*/
for (i = 0; i < ARRAY_SIZE(backends); ++i) { for (j = 0; j < ARRAY_SIZE(combos); ++j) { struct drm_i915_gem_object *obj;
u32 size = combos[j].size;
u32 pages = combos[j].pages;
obj = backends[i].fn(i915, size, backends[i].flags); if (IS_ERR(obj)) {
err = PTR_ERR(obj); if (err == -ENODEV) {
pr_info("Device lacks local memory, skipping\n");
err = 0; break;
}
return err;
}
err = i915_gem_object_pin_pages_unlocked(obj); if (err) {
i915_gem_object_put(obj); goto out;
}
/* * Simple test to catch issues with compact 64K pages -- since the pt is * compacted to 256B that gives us 32 entries per pt, however since the * backing page for the pt is 4K, any extra entries we might incorrectly * write out should be ignored by the HW. If ever hit such a case this * test should catch it since some of our writes would land in scratch.
*/
/* We want the range to cover multiple page-table boundaries. */
obj = i915_gem_object_create_lmem(i915, SZ_4M, 0); if (IS_ERR(obj)) return PTR_ERR(obj);
err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_put;
if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
pr_info("LMEM compact unable to allocate huge-page(s)\n"); goto out_unpin;
}
/* * Disable 2M GTT pages by forcing the page-size to 64K for the GTT * insertion.
*/
obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K;
err = igt_write_huge(i915, obj); if (err)
pr_err("LMEM compact write-huge failed\n");
file = mock_file(i915); if (IS_ERR(file)) return PTR_ERR(file);
ctx = hugepage_ctx(i915, file); if (IS_ERR(ctx)) {
err = PTR_ERR(ctx); goto out;
}
vm = i915_gem_context_get_eb_vm(ctx);
/* * Make sure that we don't burst into a ball of flames upon falling back * to tmpfs, which we rely on if on the off-chance we encounter a failure * when setting up gemfs.
*/
wf = intel_runtime_pm_get(&i915->runtime_pm); /* active shrink */
err = i915_vma_pin(vma, 0, 0, flags); if (err) goto out_wf;
if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
pr_info("failed to allocate THP, finishing test early\n"); goto out_unpin;
}
err = igt_check_page_sizes(vma); if (err) goto out_unpin;
n = 0;
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { if (!intel_engine_can_store_dword(ce->engine)) continue;
err = gpu_write(ce, vma, n++, 0xdeadbeaf); if (err) break;
}
i915_gem_context_unlock_engines(ctx); /* * Nuke everything *before* we unpin the pages so we can be reasonably * sure that when later checking get_nr_swap_pages() that some random * leftover object doesn't steal the remaining swap space.
*/
i915_gem_shrink(NULL, i915, -1UL, NULL,
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND |
I915_SHRINK_ACTIVE);
i915_vma_unpin(vma); if (err) goto out_wf;
/* * Now that the pages are *unpinned* shrinking should invoke * shmem to truncate our pages, if we have available swap.
*/
should_swap = get_nr_swap_pages() > 0;
i915_gem_shrink(NULL, i915, -1UL, NULL,
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND |
I915_SHRINK_ACTIVE |
I915_SHRINK_WRITEBACK); if (should_swap == i915_gem_object_has_pages(obj)) {
pr_err("unexpected pages mismatch, should_swap=%s\n",
str_yes_no(should_swap));
err = -EINVAL; goto out_wf;
}
if (!i915_vm_is_4lvl(&ppgtt->vm)) {
pr_err("failed to create 48b PPGTT\n");
err = -EINVAL; goto out_put;
}
/* If we were ever hit this then it's time to mock the 64K scratch */ if (!i915_vm_has_scratch_64K(&ppgtt->vm)) {
pr_err("PPGTT missing 64K scratch page\n");
err = -EINVAL; goto out_put;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.