/* * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE.
*/
/* * GK20A does not have dedicated video memory, and to accurately represent this * fact Nouveau will not create a RAM device for it. Therefore its instmem * implementation must be done directly on top of system memory, while * preserving coherency for read and write operations. * * Instmem can be allocated through two means: * 1) If an IOMMU unit has been probed, the IOMMU API is used to make memory * pages contiguous to the GPU. This is the preferred way. * 2) If no IOMMU unit is probed, the DMA API is used to allocate physically * contiguous memory. * * In both cases CPU read and writes are performed by creating a write-combined * mapping. The GPU L2 cache must thus be flushed/invalidated when required. To * be conservative we do this every time we acquire or release an instobj, but * ideally L2 management should be handled at a higher level. * * To improve performance, CPU mappings are not removed upon instobj release. * Instead they are placed into a LRU list to be recycled when the mapped space * goes beyond a certain threshold. At the moment this limit is 1MB.
*/ #include"priv.h"
/* * Recycle the vaddr of obj. Must be called with gk20a_instmem::lock held.
*/ staticvoid
gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj)
{ struct gk20a_instmem *imem = obj->base.imem; /* there should not be any user left... */
WARN_ON(obj->use_cpt);
list_del(&obj->vaddr_node);
vunmap(obj->base.vaddr);
obj->base.vaddr = NULL;
imem->vaddr_use -= nvkm_memory_size(&obj->base.base.memory);
nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use,
imem->vaddr_max);
}
/* * Must be called while holding gk20a_instmem::lock
*/ staticvoid
gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size)
{ while (imem->vaddr_use + size > imem->vaddr_max) { /* no candidate that can be unmapped, abort... */ if (list_empty(&imem->vaddr_lru)) break;
/* vaddr has already been recycled */ if (node->base.vaddr)
gk20a_instobj_iommu_recycle_vaddr(node);
mutex_unlock(&imem->lock);
/* clear IOMMU bit to unmap pages */
r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift);
/* Unmap pages from GPU address space and free them */ for (i = 0; i < node->base.mn->length; i++) {
iommu_unmap(imem->domain,
(r->offset + i) << imem->iommu_pgshift, PAGE_SIZE);
dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE,
DMA_BIDIRECTIONAL);
__free_page(node->pages[i]);
}
/* Release area from GPU address space */
mutex_lock(imem->mm_mutex);
nvkm_mm_free(imem->mm, &r);
mutex_unlock(imem->mm_mutex);
/* Allocate backing memory */ for (i = 0; i < npages; i++) { struct page *p = alloc_page(GFP_KERNEL);
dma_addr_t dma_adr;
if (p == NULL) {
ret = -ENOMEM; goto free_pages;
}
node->pages[i] = p;
dma_adr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, dma_adr)) {
nvkm_error(subdev, "DMA mapping error!\n");
ret = -ENOMEM; goto free_pages;
}
node->dma_addrs[i] = dma_adr;
}
mutex_lock(imem->mm_mutex); /* Reserve area from GPU address space */
ret = nvkm_mm_head(imem->mm, 0, 1, npages, npages,
align >> imem->iommu_pgshift, &r);
mutex_unlock(imem->mm_mutex); if (ret) {
nvkm_error(subdev, "IOMMU space is full!\n"); goto free_pages;
}
/* Map into GPU address space */ for (i = 0; i < npages; i++) {
u32 offset = (r->offset + i) << imem->iommu_pgshift;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.