/** * DOC: Command list validator for VC4. * * Since the VC4 has no IOMMU between it and system memory, a user * with access to execute command lists could escalate privilege by * overwriting system memory (drawing to it as a framebuffer) or * reading system memory it shouldn't (reading it as a vertex buffer * or index buffer) * * We validate binner command lists to ensure that all accesses are * within the bounds of the GEM objects referenced by the submitted * job. It explicitly whitelists packets, and looks at the offsets in * any address fields to make sure they're contained within the BOs * they reference. * * Note that because CL validation is already reading the * user-submitted CL and writing the validated copy out to the memory * that the GPU will actually read, this is also where GEM relocation * processing (turning BO references into actual addresses for the GPU * to use) happens.
*/
/** Return the width in pixels of a 64-byte microtile. */ static uint32_t
utile_width(int cpp)
{ switch (cpp) { case 1: case 2: return 8; case 4: return 4; case 8: return 2; default:
pr_err("unknown cpp: %d\n", cpp); return 1;
}
}
/** Return the height in pixels of a 64-byte microtile. */ static uint32_t
utile_height(int cpp)
{ switch (cpp) { case 1: return 8; case 2: case 4: case 8: return 4; default:
pr_err("unknown cpp: %d\n", cpp); return 1;
}
}
/** * size_is_lt() - Returns whether a miplevel of the given size will * use the lineartile (LT) tiling layout rather than the normal T * tiling layout. * @width: Width in pixels of the miplevel * @height: Height in pixels of the miplevel * @cpp: Bytes per pixel of the pixel format
*/ staticbool
size_is_lt(uint32_t width, uint32_t height, int cpp)
{ return (width <= 4 * utile_width(cpp) ||
height <= 4 * utile_height(cpp));
}
if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) return NULL;
if (hindex >= exec->bo_count) {
DRM_DEBUG("BO index %d greater than BO count %d\n",
hindex, exec->bo_count); return NULL;
}
obj = to_drm_gem_dma_obj(exec->bo[hindex]);
bo = to_vc4_bo(&obj->base);
if (bo->validated_shader) {
DRM_DEBUG("Trying to use shader BO as something other than " "a shader\n"); return NULL;
}
if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4)) returnfalse;
/* The shaded vertex format stores signed 12.4 fixed point * (-2048,2047) offsets from the viewport center, so we should * never have a render target larger than 4096. The texture * unit can only sample from 2048x2048, so it's even more * restricted. This lets us avoid worrying about overflow in * our math.
*/ if (width > 4096 || height > 4096) {
DRM_DEBUG("Surface dimensions (%d,%d) too large",
width, height); returnfalse;
}
bin_slot = vc4_v3d_get_bin_slot(vc4); if (bin_slot < 0) { if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) {
drm_err(dev, "Failed to allocate binner memory: %d\n",
bin_slot);
} return bin_slot;
}
/* The slot we allocated will only be used by this job, and is * free when the job completes rendering.
*/
exec->bin_slots |= BIT(bin_slot);
bin_addr = vc4->bin_bo->base.dma_addr + bin_slot * vc4->bin_alloc_size;
/* The tile state data array is 48 bytes per tile, and we put it at * the start of a BO containing both it and the tile alloc.
*/
tile_state_size = 48 * tile_count;
/* Since the tile alloc array will follow us, align. */
exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096);
VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL), /* Note: The docs say this was also 105, but it was 106 in the * initial userland code drop.
*/
VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
src_offset += info->len; /* GEM handle loading doesn't produce HW packets. */ if (cmd != VC4_PACKET_GEM_HANDLES)
dst_offset += info->len;
/* When the CL hits halt, it'll stop reading anything else. */ if (cmd == VC4_PACKET_HALT) break;
}
exec->ct0ea = exec->ct0ca + dst_offset;
if (!exec->found_start_tile_binning_packet) {
DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); return -EINVAL;
}
/* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The * semaphore is used to trigger the render CL to start up, and the * FLUSH is what caps the bin lists with * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main * render CL when they get called to) and actually triggers the queued * semaphore increment.
*/ if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + " "VC4_PACKET_FLUSH\n"); return -EINVAL;
}
tex = vc4_use_bo(exec, texture_handle_index); if (!tex) returnfalse;
if (sample->is_direct) {
uint32_t remaining_size = tex->base.size - p0;
if (p0 > tex->base.size - 4) {
DRM_DEBUG("UBO offset greater than UBO size\n"); goto fail;
} if (p1 > remaining_size - 4) {
DRM_DEBUG("UBO clamp would allow reads " "outside of UBO\n"); goto fail;
}
*validated_p0 = tex->dma_addr + p0; returntrue;
}
if (width == 0)
width = 2048; if (height == 0)
height = 2048;
if (p0 & VC4_TEX_P0_CMMODE_MASK) { if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK; if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) { if (cube_map_stride) {
DRM_DEBUG("Cube map stride set twice\n"); goto fail;
}
cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
} if (!cube_map_stride) {
DRM_DEBUG("Cube map stride not set\n"); goto fail;
}
}
type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
(VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
switch (type) { case VC4_TEXTURE_TYPE_RGBA8888: case VC4_TEXTURE_TYPE_RGBX8888: case VC4_TEXTURE_TYPE_RGBA32R:
cpp = 4; break; case VC4_TEXTURE_TYPE_RGBA4444: case VC4_TEXTURE_TYPE_RGBA5551: case VC4_TEXTURE_TYPE_RGB565: case VC4_TEXTURE_TYPE_LUMALPHA: case VC4_TEXTURE_TYPE_S16F: case VC4_TEXTURE_TYPE_S16:
cpp = 2; break; case VC4_TEXTURE_TYPE_LUMINANCE: case VC4_TEXTURE_TYPE_ALPHA: case VC4_TEXTURE_TYPE_S8:
cpp = 1; break; case VC4_TEXTURE_TYPE_ETC1: /* ETC1 is arranged as 64-bit blocks, where each block is 4x4 * pixels.
*/
cpp = 8;
width = (width + 3) >> 2;
height = (height + 3) >> 2; break; case VC4_TEXTURE_TYPE_BW1: case VC4_TEXTURE_TYPE_A4: case VC4_TEXTURE_TYPE_A1: case VC4_TEXTURE_TYPE_RGBA64: case VC4_TEXTURE_TYPE_YUV422R: default:
DRM_DEBUG("Texture format %d unsupported\n", type); goto fail;
}
utile_w = utile_width(cpp);
utile_h = utile_height(cpp);
/* The mipmap levels are stored before the base of the texture. Make * sure there is actually space in the BO.
*/ for (i = 1; i <= miplevels; i++) {
uint32_t level_width = max(width >> i, 1u);
uint32_t level_height = max(height >> i, 1u);
uint32_t aligned_width, aligned_height;
uint32_t level_size;
/* Once the levels get small enough, they drop from T to LT. */ if (tiling_format == VC4_TILING_FORMAT_T &&
size_is_lt(level_width, level_height, cpp)) {
tiling_format = VC4_TILING_FORMAT_LT;
}
if (packet_size > exec->shader_rec_size) {
DRM_DEBUG("overflowed shader recs copying %db packet " "from %d bytes left\n",
packet_size, exec->shader_rec_size); return -EINVAL;
}
pkt_u = exec->shader_rec_u;
pkt_v = exec->shader_rec_v;
memcpy(pkt_v, pkt_u, packet_size);
exec->shader_rec_u += packet_size; /* Shader recs have to be aligned to 16 bytes (due to the attribute * flags being in the low bytes), so round the next validated shader * rec address up. This should be safe, since we've got so many * relocations in a shader rec packet.
*/
BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
exec->shader_rec_v += roundup(packet_size, 16);
exec->shader_rec_size -= packet_size;
for (i = 0; i < shader_reloc_count; i++) { if (src_handles[i] > exec->bo_count) {
DRM_DEBUG("Shader handle %d too big\n", src_handles[i]); return -EINVAL;
}
bo[i] = to_drm_gem_dma_obj(exec->bo[src_handles[i]]); if (!bo[i]) return -EINVAL;
} for (i = shader_reloc_count; i < nr_relocs; i++) {
bo[i] = vc4_use_bo(exec, src_handles[i]); if (!bo[i]) return -EINVAL;
}
if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
DRM_DEBUG("Thread mode of CL and FS do not match\n"); return -EINVAL;
}
if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
DRM_DEBUG("cs and vs cannot be threaded\n"); return -EINVAL;
}
for (i = 0; i < shader_reloc_count; i++) { struct vc4_validated_shader_info *validated_shader;
uint32_t o = shader_reloc_offsets[i];
uint32_t src_offset = *(uint32_t *)(pkt_u + o);
uint32_t *texture_handles_u; void *uniform_data_u;
uint32_t tex, uni;
for (tex = 0;
tex < validated_shader->num_texture_samples;
tex++) { if (!reloc_tex(exec,
uniform_data_u,
&validated_shader->texture_samples[tex],
texture_handles_u[tex],
i == 2)) { return -EINVAL;
}
}
/* Fill in the uniform slots that need this shader's * start-of-uniforms address (used for resetting the uniform * stream in the presence of control flow).
*/ for (uni = 0;
uni < validated_shader->num_uniform_addr_offsets;
uni++) {
uint32_t o = validated_shader->uniform_addr_offsets[uni];
((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.