/* * Emits a PIPE_CONTROL with a non-zero post-sync operation, for * implementing two workarounds on gen6. From section 1.4.7.1 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: * * [DevSNB-C+{W/A}] Before any depth stall flush (including those * produced by non-pipelined state commands), software needs to first * send a PIPE_CONTROL with no bits set except Post-Sync Operation != * 0. * * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. * * And the workaround for these two requires this workaround first: * * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent * BEFORE the pipe-control with a post-sync op and no write-cache * flushes. * * And this last workaround is tricky because of the requirements on * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM * volume 2 part 1: * * "1 of the following must also be set: * - Render Target Cache Flush Enable ([12] of DW1) * - Depth Cache Flush Enable ([0] of DW1) * - Stall at Pixel Scoreboard ([1] of DW1) * - Depth Stall ([13] of DW1) * - Post-Sync Operation ([13] of DW1) * - Notify Enable ([8] of DW1)" * * The cache flushes require the workaround flush that triggered this * one, so we can't use it. Depth stall would trigger the same. * Post-sync nonzero is what triggered this second workaround, so we * can't use that one either. Notify enable is IRQs, which aren't * really our business. That leaves only stall at scoreboard.
*/ staticint
gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
{
u32 scratch_addr =
intel_gt_scratch_offset(rq->engine->gt,
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
u32 *cs;
cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs);
int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
u32 scratch_addr =
intel_gt_scratch_offset(rq->engine->gt,
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
u32 *cs, flags = 0; int ret;
/* Force SNB workarounds for PIPE_CONTROL flushes */
ret = gen6_emit_post_sync_nonzero_flush(rq); if (ret) return ret;
/* * Just flush everything. Experiments have shown that reducing the * number of bits based on the write domains has little performance * impact. And when rearranging requests, the order of flushes is * unknown.
*/ if (mode & EMIT_FLUSH) {
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* * Ensure that any following seqno writes only happen * when the render cache is indeed flushed.
*/
flags |= PIPE_CONTROL_CS_STALL;
} if (mode & EMIT_INVALIDATE) {
flags |= PIPE_CONTROL_TLB_INVALIDATE;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; /* * TLB invalidate requires a post-sync write.
*/
flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
}
cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs);
cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs);
cmd = MI_FLUSH_DW;
/* * We always require a command barrier so that subsequent * commands, such as breadcrumb interrupts, are strictly ordered * wrt the contents of the write cache being flushed to memory * (and thus being coherent from the CPU).
*/
cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
/* * Bspec vol 1c.3 - blitter engine command streamer: * "If ENABLED, all TLBs will be invalidated once the flush * operation is complete. This bit is only valid when the * Post-Sync Operation field is a value of 1h or 3h."
*/
cmd |= flags;
/* * Ensure that any following seqno writes only happen when the render * cache is indeed flushed. * * Workaround: 4th PIPE_CONTROL command (except the ones with only * read-cache invalidate bits set) must have the CS_STALL bit set. We * don't try to be clever and just set it unconditionally.
*/
flags |= PIPE_CONTROL_CS_STALL;
/* * CS_STALL suggests at least a post-sync write.
*/
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
/* * Just flush everything. Experiments have shown that reducing the * number of bits based on the write domains has little performance * impact.
*/ if (mode & EMIT_FLUSH) {
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
} if (mode & EMIT_INVALIDATE) {
flags |= PIPE_CONTROL_TLB_INVALIDATE;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
/* * Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set.
*/
gen7_stall_cs(rq);
}
cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.