/* * As some machines use ACPI to handle runtime-resume callbacks, and * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex * as they are required by the shrinker. Ergo, we wake the device up * first just in case.
*/
wakeref = intel_runtime_pm_get(rpm);
try_again:
ret = 0;
spin_lock(&obj->vma.lock); while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, struct i915_vma,
obj_link))) {
list_move_tail(&vma->obj_link, &still_in_list); if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) continue;
if (flags & I915_GEM_OBJECT_UNBIND_TEST) {
ret = -EBUSY; break;
}
/* * Requiring the vm destructor to take the object lock * before destroying a vma would help us eliminate the * i915_vm_tryget() here, AND thus also the barrier stuff * at the end. That's an easy fix, but sleeping locks in * a kthread should generally be avoided.
*/
ret = -EAGAIN; if (!i915_vm_tryget(vma->vm)) break;
spin_unlock(&obj->vma.lock);
/* * Since i915_vma_parked() takes the object lock * before vma destruction, it won't race us here, * and destroy the vma from under us.
*/
ret = -EBUSY; if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
assert_object_held(vma->obj);
ret = i915_vma_unbind_async(vma, vm_trylock);
}
if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
!i915_vma_is_active(vma))) { if (vm_trylock) { if (mutex_trylock(&vma->vm->mutex)) {
ret = __i915_vma_unbind(vma);
mutex_unlock(&vma->vm->mutex);
}
} else {
ret = i915_vma_unbind(vma);
}
}
staticinlinebool
gtt_user_read(struct io_mapping *mapping,
loff_t base, int offset, char __user *user_data, int length)
{ void __iomem *vaddr; unsignedlong unwritten;
/* We can use the cpu mem copy function because this is X86. */
vaddr = io_mapping_map_atomic_wc(mapping, base);
unwritten = __copy_to_user_inatomic(user_data,
(void __force *)vaddr + offset,
length);
io_mapping_unmap_atomic(vaddr); if (unwritten) {
vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
unwritten = copy_to_user(user_data,
(void __force *)vaddr + offset,
length);
io_mapping_unmap(vaddr);
} return unwritten;
}
/** * i915_gem_pread_ioctl - Reads data from the object referenced by handle. * @dev: drm device pointer * @data: ioctl data blob * @file: drm file pointer * * On error, the contents of *data are undefined.
*/ int
i915_gem_pread_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{ struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_pread *args = data; struct drm_i915_gem_object *obj; int ret;
/* PREAD is disallowed for all platforms after TGL-LP. This also * covers all platforms with local memory.
*/ if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915)) return -EOPNOTSUPP;
if (args->size == 0) return 0;
if (!access_ok(u64_to_user_ptr(args->data_ptr),
args->size)) return -EFAULT;
obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT;
/* Bounds check source. */ if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
ret = -EINVAL; goto out;
}
trace_i915_gem_object_pread(obj, args->offset, args->size);
ret = -ENODEV; if (obj->ops->pread)
ret = obj->ops->pread(obj, args); if (ret != -ENODEV) goto out;
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT); if (ret) goto out;
ret = i915_gem_shmem_pread(obj, args); if (ret == -EFAULT || ret == -ENODEV)
ret = i915_gem_gtt_pread(obj, args);
out:
i915_gem_object_put(obj); return ret;
}
/* This is the fast write path which cannot handle * page faults in the source data
*/
staticinlinebool
ggtt_write(struct io_mapping *mapping,
loff_t base, int offset, char __user *user_data, int length)
{ void __iomem *vaddr; unsignedlong unwritten;
/* We can use the cpu mem copy function because this is X86. */
vaddr = io_mapping_map_atomic_wc(mapping, base);
unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
user_data, length);
io_mapping_unmap_atomic(vaddr); if (unwritten) {
vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
unwritten = copy_from_user((void __force *)vaddr + offset,
user_data, length);
io_mapping_unmap(vaddr);
}
return unwritten;
}
/** * i915_gem_gtt_pwrite_fast - This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. * @obj: i915 GEM object * @args: pwrite arguments structure
*/ staticint
i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, conststruct drm_i915_gem_pwrite *args)
{ struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = to_gt(i915)->ggtt; struct intel_runtime_pm *rpm = &i915->runtime_pm; unsignedlong remain, offset;
intel_wakeref_t wakeref; struct drm_mm_node node; struct i915_vma *vma; void __user *user_data; int ret = 0;
if (overflows_type(args->size, remain) ||
overflows_type(args->offset, offset)) return -EINVAL;
if (i915_gem_object_has_struct_page(obj)) { /* * Avoid waking the device up if we can fallback, as * waking/resuming is very slow (worst-case 10-100 ms * depending on PCI sleeps and our own resume time). * This easily dwarfs any performance advantage from * using the cache bypass of indirect GGTT access.
*/
wakeref = intel_runtime_pm_get_if_in_use(rpm); if (!wakeref) return -EFAULT;
} else { /* No backing pages, no fallback, we must force GGTT access */
wakeref = intel_runtime_pm_get(rpm);
}
vma = i915_gem_gtt_prepare(obj, &node, true); if (IS_ERR(vma)) {
ret = PTR_ERR(vma); goto out_rpm;
}
user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset;
remain = args->size; while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page
*/
u32 page_base = node.start; unsignedint page_offset = offset_in_page(offset); unsignedint page_length = PAGE_SIZE - page_offset;
page_length = remain < page_length ? remain : page_length; if (drm_mm_node_allocated(&node)) { /* flush the write before we modify the GGTT */
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
ggtt->vm.insert_page(&ggtt->vm,
i915_gem_object_get_dma_address(obj,
offset >> PAGE_SHIFT),
node.start,
i915_gem_get_pat_index(i915,
I915_CACHE_NONE), 0);
wmb(); /* flush modifications to the GGTT (insert_page) */
} else {
page_base += offset & PAGE_MASK;
} /* If we get a fault while copying data, then (presumably) our * source page isn't available. Return the error and we'll * retry in the slow path. * If the object is non-shmem backed, we retry again with the * path that handles page fault.
*/ if (ggtt_write(&ggtt->iomap, page_base, page_offset,
user_data, page_length)) {
ret = -EFAULT; break;
}
/* Per-page copy function for the shmem pwrite fastpath. * Flushes invalid cachelines before writing to the target if * needs_clflush_before is set and flushes out any written cachelines after * writing if needs_clflush is set.
*/ staticint
shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, bool needs_clflush_before, bool needs_clflush_after)
{ char *vaddr; int ret;
vaddr = kmap(page);
if (needs_clflush_before)
drm_clflush_virt_range(vaddr + offset, len);
ret = __copy_from_user(vaddr + offset, user_data, len); if (!ret && needs_clflush_after)
drm_clflush_virt_range(vaddr + offset, len);
/* If we don't overwrite a cacheline completely we need to be * careful to have up-to-date data by first clflushing. Don't * overcomplicate things and flush the entire patch.
*/
partial_cacheline_write = 0; if (needs_clflush & CLFLUSH_BEFORE)
partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
/** * i915_gem_pwrite_ioctl - Writes data to the object referenced by handle. * @dev: drm device * @data: ioctl data blob * @file: drm file * * On error, the contents of the buffer that were to be modified are undefined.
*/ int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{ struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_pwrite *args = data; struct drm_i915_gem_object *obj; int ret;
/* PWRITE is disallowed for all platforms after TGL-LP. This also * covers all platforms with local memory.
*/ if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915)) return -EOPNOTSUPP;
if (args->size == 0) return 0;
if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) return -EFAULT;
obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT;
/* Bounds check destination. */ if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
ret = -EINVAL; goto err;
}
/* Writes not allowed into this read-only object */ if (i915_gem_object_is_readonly(obj)) {
ret = -EINVAL; goto err;
}
ret = -ENODEV; if (obj->ops->pwrite)
ret = obj->ops->pwrite(obj, args); if (ret != -ENODEV) goto err;
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT); if (ret) goto err;
ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise * it would end up going through the fenced access, and we'll get * different detiling behavior between reading and writing. * pread/pwrite currently are reading and writing from the CPU * perspective, requiring manual detiling by the client.
*/ if (!i915_gem_object_has_struct_page(obj) ||
i915_gem_cpu_write_needs_clflush(obj)) /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between * textures). Fallback to the shmem path in that case.
*/
ret = i915_gem_gtt_pwrite_fast(obj, args);
if (ret == -EFAULT || ret == -ENOSPC) { if (i915_gem_object_has_struct_page(obj))
ret = i915_gem_shmem_pwrite(obj, args);
}
err:
i915_gem_object_put(obj); return ret;
}
/** * i915_gem_sw_finish_ioctl - Called when user space has done writes to this buffer * @dev: drm device * @data: ioctl data blob * @file: drm file
*/ int
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{ struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj;
obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT;
/* * Proxy objects are barred from CPU access, so there is no * need to ban sw_finish as it is a nop.
*/
/* Pinned buffers may be scanout, so flush the cache */
i915_gem_object_flush_if_display(obj);
i915_gem_object_put(obj);
/* * Only called during RPM suspend. All users of the userfault_list * must be holding an RPM wakeref to ensure that this can not * run concurrently with themselves (and use the struct_mutex for * protection between themselves).
*/
list_for_each_entry_safe(obj, on,
&to_gt(i915)->ggtt->userfault_list, userfault_link)
__i915_gem_object_release_mmap_gtt(obj);
list_for_each_entry_safe(obj, on,
&i915->runtime_pm.lmem_userfault_list, userfault_link)
i915_gem_object_runtime_pm_release_mmap_offset(obj);
/* * The fence will be lost when the device powers down. If any were * in use by hardware (i.e. they are pinned), we should not be powering * down! All other fences will be reacquired by the user upon waking.
*/ for (i = 0; i < to_gt(i915)->ggtt->num_fences; i++) { struct i915_fence_reg *reg = &to_gt(i915)->ggtt->fence_regs[i];
/* * Ideally we want to assert that the fence register is not * live at this point (i.e. that no piece of code will be * trying to write through fence + GTT, as that both violates * our tracking of activity and associated locking/barriers, * but also is illegal given that the hw is powered down). * * Previously we used reg->pin_count as a "liveness" indicator. * That is not sufficient, and we need a more fine-grained * tool if we want to have a sanity check here.
*/
if (flags & PIN_MAPPABLE &&
(!view || view->type == I915_GTT_VIEW_NORMAL)) { /* * If the required space is larger than the available * aperture, we will not able to find a slot for the * object and unbinding the object now will be in * vain. Worse, doing so may cause us to ping-pong * the object in and out of the Global GTT and * waste a lot of cycles under the mutex.
*/ if (obj->base.size > ggtt->mappable_end) return ERR_PTR(-E2BIG);
/* * If NONBLOCK is set the caller is optimistically * trying to cache the full object within the mappable * aperture, and *must* have a fallback in place for * situations where we cannot bind the object. We * can be a little more lax here and use the fallback * more often to avoid costly migrations of ourselves * and other objects within the aperture. * * Half-the-aperture is used as a simple heuristic. * More interesting would to do search for a free * block prior to making the commitment to unbind. * That caters for the self-harm case, and with a * little more heuristics (e.g. NOFAULT, NOEVICT) * we could try to minimise harm to others.
*/ if (flags & PIN_NONBLOCK &&
obj->base.size > ggtt->mappable_end / 2) return ERR_PTR(-ENOSPC);
}
new_vma:
vma = i915_vma_instance(obj, &ggtt->vm, view); if (IS_ERR(vma)) return vma;
if (i915_vma_misplaced(vma, size, alignment, flags)) { if (flags & PIN_NONBLOCK) { if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) return ERR_PTR(-ENOSPC);
/* * If this misplaced vma is too big (i.e, at-least * half the size of aperture) or hasn't been pinned * mappable before, we ignore the misplacement when * PIN_NONBLOCK is set in order to avoid the ping-pong * issue described above. In other words, we try to * avoid the costly operation of unbinding this vma * from the GGTT and rebinding it back because there * may not be enough space for this vma in the aperture.
*/ if (flags & PIN_MAPPABLE &&
(vma->fence_size > ggtt->mappable_end / 2 ||
!i915_vma_is_map_and_fenceable(vma))) return ERR_PTR(-ENOSPC);
}
if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) {
discard_ggtt_vma(vma); goto new_vma;
}
ret = i915_vma_unbind(vma); if (ret) return ERR_PTR(ret);
}
ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);
if (ret) return ERR_PTR(ret);
if (vma->fence && !i915_gem_object_is_tiled(obj)) {
mutex_lock(&ggtt->vm.mutex);
i915_vma_revoke_fence(vma);
mutex_unlock(&ggtt->vm.mutex);
}
ret = i915_vma_wait_for_bind(vma); if (ret) {
i915_vma_unpin(vma); return ERR_PTR(ret);
}
/* if the object is no longer attached, discard its backing storage */ if (obj->mm.madv == I915_MADV_DONTNEED &&
!i915_gem_object_has_pages(obj))
i915_gem_object_truncate(obj);
/* * A single pass should suffice to release all the freed objects (along most * call paths), but be a little more paranoid in that freeing the objects does * take a little amount of time, during which the rcu callbacks could have added * new objects into the freed list, and armed the work again.
*/ void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
{ while (atomic_read(&i915->mm.free_count)) {
flush_work(&i915->mm.free_work);
drain_workqueue(i915->bdev.wq);
rcu_barrier();
}
}
/* * Similar to objects above (see i915_gem_drain_freed-objects), in general we * have workers that are armed by RCU and then rearm themselves in their * callbacks. To be paranoid, we need to drain the workqueue a second time after * waiting for the RCU grace period so that we catch work queued via RCU from * the first pass. As neither drain_workqueue() nor flush_workqueue() report a * result, we make an assumption that we only don't require more than 3 passes * to catch all _recursive_ RCU delayed work.
*/ void i915_gem_drain_workqueue(struct drm_i915_private *i915)
{ int i;
for (i = 0; i < 3; i++) {
flush_workqueue(i915->wq);
rcu_barrier();
i915_gem_drain_freed_objects(i915);
}
drain_workqueue(i915->wq);
}
int i915_gem_init(struct drm_i915_private *dev_priv)
{ struct intel_gt *gt; unsignedint i; int ret;
/* * In the process of replacing cache_level with pat_index a tricky * dependency is created on the definition of the enum i915_cache_level. * In case this enum is changed, PTE encode would be broken. * Add a WARNING here. And remove when we completely quit using this * enum.
*/
BUILD_BUG_ON(I915_CACHE_NONE != 0 ||
I915_CACHE_LLC != 1 ||
I915_CACHE_L3_LLC != 2 ||
I915_CACHE_WT != 3 ||
I915_MAX_CACHE_LEVEL != 4);
/* We need to fallback to 4K pages if host doesn't support huge gtt. */ if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
RUNTIME_INFO(dev_priv)->page_sizes = I915_GTT_PAGE_SIZE_4K;
ret = i915_init_ggtt(dev_priv); if (ret) {
GEM_BUG_ON(ret == -EIO); goto err_unlock;
}
/* * Despite its name intel_clock_gating_init applies both display * clock gating workarounds; GT mmio workarounds and the occasional * GT power context workaround. Worse, sometimes it includes a context * register workaround which we need to apply before we record the * default HW state for all contexts. * * FIXME: break up the workarounds and apply them at the right time!
*/
intel_clock_gating_init(dev_priv);
for_each_gt(gt, dev_priv, i) {
ret = intel_gt_init(gt); if (ret) goto err_unlock;
}
/* * Register engines early to ensure the engine list is in its final * rb-tree form, lowering the amount of code that has to deal with * the intermediate llist state.
*/
intel_engines_driver_register(dev_priv);
return 0;
/* * Unwinding is complicated by that we want to handle -EIO to mean * disable GPU submission but keep KMS alive. We want to mark the * HW as irrevisibly wedged, but keep enough state around that the * driver doesn't explode during runtime.
*/
err_unlock:
i915_gem_drain_workqueue(dev_priv);
if (ret == -EIO) { /* * Allow engines or uC initialisation to fail by marking the GPU * as wedged. But we only want to do this when the GPU is angry, * for all other failure, such as an allocation failure, bail.
*/
for_each_gt(gt, dev_priv, i) { if (!intel_gt_is_wedged(gt)) {
i915_probe_error(dev_priv, "Failed to initialize GPU, declaring it wedged!\n");
intel_gt_set_wedged(gt);
}
}
/* Minimal basic recovery for KMS */
ret = i915_ggtt_enable_hw(dev_priv);
i915_ggtt_resume(to_gt(dev_priv)->ggtt);
intel_clock_gating_init(dev_priv);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.