/* Don't forget to update vc4_bo.c: bo_type_names[] when adding to * this.
*/ enum vc4_kernel_bo_type { /* Any kernel allocation (gem_create_object hook) before it * gets another type set.
*/
VC4_BO_TYPE_KERNEL,
VC4_BO_TYPE_V3D,
VC4_BO_TYPE_V3D_SHADER,
VC4_BO_TYPE_DUMB,
VC4_BO_TYPE_BIN,
VC4_BO_TYPE_RCL,
VC4_BO_TYPE_BCL,
VC4_BO_TYPE_KERNEL_CACHE,
VC4_BO_TYPE_COUNT
};
/* Performance monitor object. The perform lifetime is controlled by userspace * using perfmon related ioctls. A perfmon can be attached to a submit_cl * request, and when this is the case, HW perf counters will be activated just * before the submit_cl is submitted to the GPU and disabled when the job is * done. This way, only events related to a specific job will be counted.
*/ struct vc4_perfmon { struct vc4_dev *dev;
/* Tracks the number of users of the perfmon, when this counter reaches * zero the perfmon is destroyed.
*/
refcount_t refcnt;
/* Number of counters activated in this perfmon instance * (should be less than DRM_VC4_MAX_PERF_COUNTERS).
*/
u8 ncounters;
/* Events counted by the HW perf counters. */
u8 events[DRM_VC4_MAX_PERF_COUNTERS];
/* Storage for counter values. Counters are incremented by the HW * perf counter values every time the perfmon is attached to a GPU job. * This way, perfmon users don't have to retrieve the results after * each job if they want to track events covering several submissions. * Note that counter values can't be reset, but you can fake a reset by * destroying the perfmon and creating a new one.
*/
u64 counters[] __counted_by(ncounters);
};
/* The kernel-space BO cache. Tracks buffers that have been * unreferenced by all other users (refcounts of 0!) but not * yet freed, so we can do cheap allocations.
*/ struct vc4_bo_cache { /* Array of list heads for entries in the BO cache, * based on number of pages, so we can do O(1) lookups * in the cache when allocating.
*/ struct list_head *size_list;
uint32_t size_list_size;
/* List of all BOs in the cache, ordered by age, so we * can do O(1) lookups when trying to free old * buffers.
*/ struct list_head time_list; struct work_struct time_work; struct timer_list time_timer;
} bo_cache;
/* Protects bo_cache and bo_labels. */ struct mutex bo_lock;
/* Purgeable BO pool. All BOs in this pool can have their memory * reclaimed if the driver is unable to allocate new BOs. We also * keep stats related to the purge mechanism here.
*/ struct { struct list_head list; unsignedint num;
size_t size; unsignedint purged_num;
size_t purged_size; struct mutex lock;
} purgeable;
uint64_t dma_fence_context;
/* Sequence number for the last job queued in bin_job_list. * Starts at 0 (no jobs emitted).
*/
uint64_t emit_seqno;
/* Sequence number for the last completed job on the GPU. * Starts at 0 (no jobs completed).
*/
uint64_t finished_seqno;
/* List of all struct vc4_exec_info for jobs to be executed in * the binner. The first job in the list is the one currently * programmed into ct0ca for execution.
*/ struct list_head bin_job_list;
/* List of all struct vc4_exec_info for jobs that have * completed binning and are ready for rendering. The first * job in the list is the one currently programmed into ct1ca * for execution.
*/ struct list_head render_job_list;
/* List of the finished vc4_exec_infos waiting to be freed by * job_done_work.
*/ struct list_head job_done_list; /* Spinlock used to synchronize the job_list and seqno * accesses between the IRQ handler and GEM ioctls.
*/
spinlock_t job_lock;
wait_queue_head_t job_wait_queue; struct work_struct job_done_work;
/* Used to track the active perfmon if any. Access to this field is * protected by job_lock.
*/ struct vc4_perfmon *active_perfmon;
/* The memory used for storing binner tile alloc, tile state, * and overflow memory allocations. This is freed when V3D * powers down.
*/ struct vc4_bo *bin_bo;
/* Size of blocks allocated within bin_bo. */
uint32_t bin_alloc_size;
/* Bitmask of the bin_alloc_size chunks in bin_bo that are * used.
*/
uint32_t bin_alloc_used;
/* Bitmask of the current bin_alloc used for overflow memory. */
uint32_t bin_alloc_overflow;
/* Incremented when an underrun error happened after an atomic commit. * This is particularly useful to detect when a specific modeset is too * demanding in term of memory or HVS bandwidth which is hard to guess * at atomic check time.
*/
atomic_t underrun;
struct work_struct overflow_mem_work;
int power_refcount;
/* Set to true when the load tracker is active. */ bool load_tracker_enabled;
/* Mutex controlling the power refcount. */ struct mutex power_lock;
/* List entry for the BO's position in either * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
*/ struct list_head unref_head;
/* Time in jiffies when the BO was put in vc4->bo_cache. */ unsignedlong free_time;
/* List entry for the BO's position in vc4_dev->bo_cache.size_list */ struct list_head size_head;
/* Struct for shader validation state, if created by * DRM_IOCTL_VC4_CREATE_SHADER_BO.
*/ struct vc4_validated_shader_info *validated_shader;
/* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i * for user-allocated labels.
*/ int label;
/* Count the number of active users. This is needed to determine * whether we can move the BO to the purgeable list or not (when the BO * is used by the GPU or the display engine we can't purge it).
*/
refcount_t usecnt;
/* Store purgeable/purged state here */
u32 madv; struct mutex madv_lock;
};
/* Memory manager for CRTCs to allocate space in the display * list. Units are dwords.
*/ struct drm_mm dlist_mm;
/* Memory manager for the LBM memory used by HVS scaling. */ struct drm_mm lbm_mm;
/* Memory manager for the UPM memory used for prefetching. */ struct drm_mm upm_mm; struct ida upm_handles; struct vc4_upm_refcounts upm_refcounts[VC4_NUM_UPM_HANDLES + 1];
spinlock_t mm_lock;
struct drm_mm_node mitchell_netravali_filter;
struct debugfs_regset32 regset;
/* * Even if HDMI0 on the RPi4 can output modes requiring a pixel * rate higher than 297MHz, it needs some adjustments in the * config.txt file to be able to do so and thus won't always be * available.
*/ bool vc5_hdmi_enable_hdmi_20;
/* * 4096x2160@60 requires a core overclock to work, so register * whether that is sufficient.
*/ bool vc5_hdmi_enable_4096by2160;
};
struct vc4_plane_state { struct drm_plane_state base; /* System memory copy of the display list for this element, computed * at atomic_check time.
*/
u32 *dlist;
u32 dlist_size; /* Number of dwords allocated for the display list */
u32 dlist_count; /* Number of used dwords in the display list. */
/* Offset in the dlist to various words, for pageflip or * cursor updates.
*/
u32 pos0_offset;
u32 pos2_offset;
u32 ptr0_offset[DRM_FORMAT_MAX_PLANES];
u32 lbm_offset;
/* Offset where the plane's dlist was last stored in the * hardware at vc4_crtc_atomic_flush() time.
*/
u32 __iomem *hw_dlist;
/* Clipped coordinates of the plane on the display. */ int crtc_x, crtc_y, crtc_w, crtc_h; /* Clipped area being scanned from in the FB in u16.16 format */
u32 src_x, src_y;
u32 src_w[2], src_h[2];
/* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */ enum vc4_scaling_mode x_scaling[2], y_scaling[2]; bool is_unity; bool is_yuv;
/* Our allocation in LBM for temporary storage during scaling. */ struct drm_mm_node lbm;
/* The Unified Pre-Fetcher Handle */ unsignedint upm_handle[DRM_FORMAT_MAX_PLANES];
/* Number of lines to pre-fetch */ unsignedint upm_buffer_lines;
/* Set when the plane has per-pixel alpha content or does not cover * the entire screen. This is a hint to the CRTC that it might need * to enable background color fill.
*/ bool needs_bg_fill;
/* Mark the dlist as initialized. Useful to avoid initializing it twice * when async update is not possible.
*/ bool dlist_initialized;
/* Load of this plane on the HVS block. The load is expressed in HVS * cycles/sec.
*/
u64 hvs_load;
/* Memory bandwidth needed for this plane. This is expressed in * bytes/sec.
*/
u64 membus_load;
};
/* Timestamp at start of vblank irq - unaffected by lock delays. */
ktime_t t_vblank;
u8 lut_r[256];
u8 lut_g[256];
u8 lut_b[256];
struct drm_pending_vblank_event *event;
struct debugfs_regset32 regset;
/** * @feeds_txp: True if the CRTC feeds our writeback controller.
*/ bool feeds_txp;
/** * @irq_lock: Spinlock protecting the resources shared between * the atomic code and our vblank handler.
*/
spinlock_t irq_lock;
/** * @current_dlist: Start offset of the display list currently * set in the HVS for that CRTC. Protected by @irq_lock, and * copied in vc4_hvs_update_dlist() for the CRTC interrupt * handler to have access to that value.
*/ unsignedint current_dlist;
/** * @current_hvs_channel: HVS channel currently assigned to the * CRTC. Protected by @irq_lock, and copied in * vc4_hvs_atomic_begin() for the CRTC interrupt handler to have * access to that value.
*/ unsignedint current_hvs_channel;
};
#define V3D_READ(offset) \
({ \
kunit_fail_current_test("Accessing a register in a unit test!\n"); \
readl(vc4->v3d->regs + (offset)); \
})
#define V3D_WRITE(offset, val) \ do { \
kunit_fail_current_test("Accessing a register in a unit test!\n"); \
writel(val, vc4->v3d->regs + (offset)); \
} while (0)
#define HVS_READ(offset) \
({ \
kunit_fail_current_test("Accessing a register in a unit test!\n"); \
readl(hvs->regs + (offset)); \
})
#define HVS_WRITE(offset, val) \ do { \
kunit_fail_current_test("Accessing a register in a unit test!\n"); \
writel(val, hvs->regs + (offset)); \
} while (0)
/* Sequence number for this bin/render job. */
uint64_t seqno;
struct dma_fence *fence;
/* Last current addresses the hardware was processing when the * hangcheck timer checked on us.
*/
uint32_t last_ct0ca, last_ct1ca;
/* Kernel-space copy of the ioctl arguments */ struct drm_vc4_submit_cl *args;
/* This is the array of BOs that were looked up at the start of exec. * Command validation will use indices into this array.
*/ struct drm_gem_object **bo;
uint32_t bo_count;
/* List of BOs that are being written by the RCL. Other than * the binner temporary storage, this is all the BOs written * by the job.
*/ struct drm_gem_dma_object *rcl_write_bo[4];
uint32_t rcl_write_bo_count;
/* Pointers for our position in vc4->job_list */ struct list_head head;
/* List of other BOs used in the job that need to be released * once the job is complete.
*/ struct list_head unref_list;
/* Current unvalidated indices into @bo loaded by the non-hardware * VC4_PACKET_GEM_HANDLES.
*/
uint32_t bo_index[2];
/* This is the BO where we store the validated command lists, shader * records, and uniforms.
*/ struct drm_gem_dma_object *exec_bo;
/** * This tracks the per-shader-record state (packet 64) that * determines the length of the shader record and the offset * it's expected to be found at. It gets read in from the * command lists.
*/ struct vc4_shader_state {
uint32_t addr; /* Maximum vertex index referenced by any primitive using this * shader state.
*/
uint32_t max_index;
} *shader_state;
/** How many shader states the user declared they were using. */
uint32_t shader_state_size; /** How many shader state records the validator has seen. */
uint32_t shader_state_count;
bool found_tile_binning_mode_config_packet; bool found_start_tile_binning_packet; bool found_increment_semaphore_packet; bool found_flush;
uint8_t bin_tiles_x, bin_tiles_y; /* Physical address of the start of the tile alloc array * (where each tile's binned CL will start)
*/
uint32_t tile_alloc_offset; /* Bitmask of which binner slots are freed when this job completes. */
uint32_t bin_slots;
/** * Computed addresses pointing into exec_bo where we start the * bin thread (ct0) and render thread (ct1).
*/
uint32_t ct0ca, ct0ea;
uint32_t ct1ca, ct1ea;
/* Pointer to the unvalidated bin CL (if present). */ void *bin_u;
/* Pointers to the shader recs. These paddr gets incremented as CL * packets are relocated in validate_gl_shader_state, and the vaddrs * (u and v) get incremented and size decremented as the shader recs * themselves are validated.
*/ void *shader_rec_u; void *shader_rec_v;
uint32_t shader_rec_p;
uint32_t shader_rec_size;
/* Pointers to the uniform data. These pointers are incremented, and * size decremented, as each batch of uniforms is uploaded.
*/ void *uniforms_u; void *uniforms_v;
uint32_t uniforms_p;
uint32_t uniforms_size;
/* Pointer to a performance monitor object if the user requested it, * NULL otherwise.
*/ struct vc4_perfmon *perfmon;
/* Whether the exec has taken a reference to the binner BO, which should * happen with a VC4_PACKET_TILE_BINNING_MODE_CONFIG packet.
*/ bool bin_bo_used;
};
/* Per-open file private data. Any driver-specific resource that has to be * released when the DRM file is closed should be placed here.
*/ struct vc4_file { struct vc4_dev *dev;
/** * struct vc4_texture_sample_info - saves the offsets into the UBO for texture * setup parameters. * * This will be used at draw time to relocate the reference to the texture * contents in p0, and validate that the offset combined with * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO. * Note that the hardware treats unprovided config parameters as 0, so not all * of them need to be set up for every texure sample, and we'll store ~0 as * the offset to mark the unused ones. * * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit * Setup") for definitions of the texture parameters.
*/ struct vc4_texture_sample_info { bool is_direct;
uint32_t p_offset[4];
};
/** * struct vc4_validated_shader_info - information about validated shaders that * needs to be used from command list validation. * * For a given shader, each time a shader state record references it, we need * to verify that the shader doesn't read more uniforms than the shader state * record's uniform BO pointer can provide, and we need to apply relocations * and validate the shader state record's uniforms that define the texture * samples.
*/ struct vc4_validated_shader_info {
uint32_t uniforms_size;
uint32_t uniforms_src_size;
uint32_t num_texture_samples; struct vc4_texture_sample_info *texture_samples;
/** * __wait_for - magic wait macro * * Macro to help avoid open coding check/wait/timeout patterns. Note that it's * important that we check the condition again after having timed out, since the * timeout could be due to preemption or similar and we've never had a chance to * check the condition before the timeout.
*/ #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ int ret__; \
might_sleep(); \ for (;;) { \ constbool expired__ = ktime_after(ktime_get_raw(), end__); \
OP; \ /* Guarantee COND check prior to timeout */ \
barrier(); \ if (COND) { \
ret__ = 0; \ break; \
} \ if (expired__) { \
ret__ = -ETIMEDOUT; \ break; \
} \
usleep_range(wait__, wait__ * 2); \ if (wait__ < (Wmax)) \
wait__ <<= 1; \
} \
ret__; \
})
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.