struct xe_eu_stall_gt { /* Lock to protect stream */ struct mutex stream_lock; /* EU stall data stream */ struct xe_eu_stall_data_stream *stream; /* Workqueue to schedule buffer pointers polling work */ struct workqueue_struct *buf_ptr_poll_wq;
};
/** * struct eu_stall_open_properties - EU stall sampling properties received * from user space at open. * @sampling_rate_mult: EU stall sampling rate multiplier. * HW will sample every (sampling_rate_mult x 251) cycles. * @wait_num_reports: Minimum number of EU stall data reports to unblock poll(). * @gt: GT on which EU stall data will be captured.
*/ struct eu_stall_open_properties { int sampling_rate_mult; int wait_num_reports; struct xe_gt *gt;
};
/* * EU stall data format for PVC
*/ struct xe_eu_stall_data_pvc {
__u64 ip_addr:29; /* Bits 0 to 28 */
__u64 active_count:8; /* Bits 29 to 36 */
__u64 other_count:8; /* Bits 37 to 44 */
__u64 control_count:8; /* Bits 45 to 52 */
__u64 pipestall_count:8; /* Bits 53 to 60 */
__u64 send_count:8; /* Bits 61 to 68 */
__u64 dist_acc_count:8; /* Bits 69 to 76 */
__u64 sbid_count:8; /* Bits 77 to 84 */
__u64 sync_count:8; /* Bits 85 to 92 */
__u64 inst_fetch_count:8; /* Bits 93 to 100 */
__u64 unused_bits:27;
__u64 unused[6];
} __packed;
/* * EU stall data format for Xe2 arch GPUs (LNL, BMG).
*/ struct xe_eu_stall_data_xe2 {
__u64 ip_addr:29; /* Bits 0 to 28 */
__u64 tdr_count:8; /* Bits 29 to 36 */
__u64 other_count:8; /* Bits 37 to 44 */
__u64 control_count:8; /* Bits 45 to 52 */
__u64 pipestall_count:8; /* Bits 53 to 60 */
__u64 send_count:8; /* Bits 61 to 68 */
__u64 dist_acc_count:8; /* Bits 69 to 76 */
__u64 sbid_count:8; /* Bits 77 to 84 */
__u64 sync_count:8; /* Bits 85 to 92 */
__u64 inst_fetch_count:8; /* Bits 93 to 100 */
__u64 active_count:8; /* Bits 101 to 108 */
__u64 ex_id:3; /* Bits 109 to 111 */
__u64 end_flag:1; /* Bit 112 */
__u64 unused_bits:15;
__u64 unused[6];
} __packed;
/** * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information. * * @num_rates: Pointer to a u32 to return the number of sampling rates. * @rates: double u64 pointer to point to an array of sampling rates. * * Stores the number of sampling rates and pointer to the array of * sampling rates in the input pointers. * * Returns: Size of the EU stall sampling rates array.
*/
size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates)
{
*num_rates = ARRAY_SIZE(eu_stall_sampling_rates);
*rates = eu_stall_sampling_rates;
returnsizeof(eu_stall_sampling_rates);
}
/** * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size. * * Returns: The per XeCore buffer size used to allocate the per GT * EU stall data buffer.
*/
size_t xe_eu_stall_get_per_xecore_buf_size(void)
{ return per_xecore_buf_size;
}
/** * xe_eu_stall_data_record_size - get EU stall data record size. * * @xe: Pointer to a Xe device. * * Returns: EU stall data record size.
*/
size_t xe_eu_stall_data_record_size(struct xe_device *xe)
{
size_t record_size = 0;
/** * num_data_rows - Return the number of EU stall data rows of 64B each * for a given data size. * * @data_size: EU stall data size
*/ static u32 num_data_rows(u32 data_size)
{ return data_size >> 6;
}
/** * xe_eu_stall_init() - Allocate and initialize GT level EU stall data * structure xe_eu_stall_gt within struct xe_gt. * * @gt: GT being initialized. * * Returns: zero on success or a negative error code.
*/ int xe_eu_stall_init(struct xe_gt *gt)
{ struct xe_device *xe = gt_to_xe(gt); int ret;
if (!xe_eu_stall_supported_on_platform(xe)) return 0;
gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); if (!gt->eu_stall) {
ret = -ENOMEM; gotoexit;
}
mutex_init(>->eu_stall->stream_lock);
gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0); if (!gt->eu_stall->buf_ptr_poll_wq) {
ret = -ENOMEM; goto exit_free;
}
if (ext.next_extension) return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props);
return 0;
}
/** * buf_data_size - Calculate the number of bytes in a circular buffer * given the read and write pointers and the size of * the buffer. * * @buf_size: Size of the circular buffer * @read_ptr: Read pointer with an additional overflow bit * @write_ptr: Write pointer with an additional overflow bit * * Since the read and write pointers have an additional overflow bit, * this function calculates the offsets from the pointers and use the * offsets to calculate the data size in the buffer. * * Returns: number of bytes of data in the buffer
*/ static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr)
{
u32 read_offset, write_offset, size = 0;
/* On PVC, the overflow bit has to be cleared by writing 1 to it. * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it.
*/ if (GRAPHICS_VER(xe) >= 20)
write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); else
write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
/* Hardware increments the read and write pointers such that they can * overflow into one additional bit. For example, a 256KB size buffer * offset pointer needs 18 bits. But HW uses 19 bits for the read and * write pointers. This technique avoids wasting a slot in the buffer. * Read and write offsets are calculated from the pointers in order to * check if the write pointer has wrapped around the array.
*/
xecore_buf = &stream->xecore_buf[xecore];
xecore_start_vaddr = xecore_buf->vaddr;
read_ptr = xecore_buf->read;
write_ptr = xecore_buf->write;
buf_size = stream->per_xecore_buf_size;
read_data_size = buf_data_size(buf_size, read_ptr, write_ptr); /* Read only the data that the user space buffer can accommodate */
read_data_size = min_t(size_t, count - *total_data_size, read_data_size); if (read_data_size == 0) goto exit_drop;
/* Read pointer can overflow into one additional bit */
read_ptr &= (buf_size << 1) - 1;
read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6));
read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
xecore_buf->read = read_ptr;
trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr,
read_data_size, *total_data_size);
exit_drop: /* Clear drop bit (if set) after any data was read or if the buffer was empty. * Drop bit can be set even if the buffer is empty as the buffer may have been emptied * in the previous read() and the data drop bit was set during the previous read().
*/ if (test_bit(xecore, stream->data_drop.mask)) {
clear_dropped_eviction_line_bit(gt, group, instance);
clear_bit(xecore, stream->data_drop.mask);
} return 0;
}
/** * xe_eu_stall_stream_read_locked - copy EU stall counters data from the * per xecore buffers to the userspace buffer * @stream: A stream opened for EU stall count metrics * @file: An xe EU stall data stream file * @buf: destination buffer given by userspace * @count: the number of bytes userspace wants to read * * Returns: Number of bytes copied or a negative error code * If we've successfully copied any data then reporting that takes * precedence over any internal error status, so the data isn't lost.
*/ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream, struct file *file, char __user *buf,
size_t count)
{ struct xe_gt *gt = stream->gt;
size_t total_size = 0;
u16 group, instance; unsignedint xecore; int ret = 0;
mutex_lock(&stream->xecore_buf_lock); if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { if (!stream->data_drop.reported_to_user) {
stream->data_drop.reported_to_user = true;
xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n",
XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask);
mutex_unlock(&stream->xecore_buf_lock); return -EIO;
}
stream->data_drop.reported_to_user = false;
}
/* * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE * before calling read(). * * Returns: The number of bytes copied or a negative error code on failure. * -EIO if HW drops any EU stall data when the buffer is full.
*/ static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{ struct xe_eu_stall_data_stream *stream = file->private_data; struct xe_gt *gt = stream->gt;
ssize_t ret, aligned_count;
aligned_count = ALIGN_DOWN(count, stream->data_record_size); if (aligned_count == 0) return -EINVAL;
if (!stream->enabled) {
xe_gt_dbg(gt, "EU stall data stream not enabled to read\n"); return -EINVAL;
}
if (!(file->f_flags & O_NONBLOCK)) { do {
ret = wait_event_interruptible(stream->poll_wq, stream->pollin); if (ret) return -EINTR;
mutex_lock(>->eu_stall->stream_lock);
ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
mutex_unlock(>->eu_stall->stream_lock);
} while (ret == -EAGAIN);
} else {
mutex_lock(>->eu_stall->stream_lock);
ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
mutex_unlock(>->eu_stall->stream_lock);
}
/* * This may not work correctly if the user buffer is very small. * We don't want to block the next read() when there is data in the buffer * now, but couldn't be accommodated in the small user buffer.
*/
stream->pollin = false;
/* Take runtime pm ref and forcewake to disable RC6 */
xe_pm_runtime_get(gt_to_xe(gt));
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) {
xe_gt_err(gt, "Failed to get RENDER forcewake\n");
xe_pm_runtime_put(gt_to_xe(gt)); return -ETIMEDOUT;
}
if (XE_WA(gt, 22016596838))
xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
_MASKED_BIT_ENABLE(DISABLE_DOP_GATING));
for_each_dss_steering(xecore, gt, group, instance) {
write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance); /* Clear any drop bits set and not cleared in the previous session. */ if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
clear_dropped_eviction_line_bit(gt, group, instance);
write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr);
read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); /* Initialize the read pointer to the write pointer */
xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
write_ptr <<= 6;
write_ptr &= (stream->per_xecore_buf_size << 1) - 1;
xecore_buf = &stream->xecore_buf[xecore];
xecore_buf->write = write_ptr;
xecore_buf->read = write_ptr;
}
stream->data_drop.reported_to_user = false;
bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS);
/* Only one session can be active at any time */ if (gt->eu_stall->stream) {
xe_gt_dbg(gt, "EU stall sampling session already active\n"); return -EBUSY;
}
stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (!stream) return -ENOMEM;
gt->eu_stall->stream = stream;
stream->gt = gt;
ret = xe_eu_stall_stream_init(stream, props); if (ret) {
xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret); goto err_free;
}
stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags); if (stream_fd < 0) {
ret = stream_fd;
xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret); goto err_destroy;
}
/* Take a reference on the driver that will be kept with stream_fd * until its release.
*/
drm_dev_get(>->tile->xe->drm);
/** * xe_eu_stall_stream_open - Open a xe EU stall data stream fd * * @dev: DRM device pointer * @data: pointer to first struct @drm_xe_ext_set_property in * the chain of input properties from the user space. * @file: DRM file pointer * * This function opens a EU stall data stream with input properties from * the user space. * * Returns: EU stall data stream fd on success or a negative error code.
*/ int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file)
{ struct xe_device *xe = to_xe_device(dev); struct eu_stall_open_properties props = {}; int ret;
if (!xe_eu_stall_supported_on_platform(xe)) {
drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n"); return -ENODEV;
}
if (xe_observation_paranoid && !perfmon_capable()) {
drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n"); return -EACCES;
}
/* Initialize and set default values */
props.wait_num_reports = 1;
props.sampling_rate_mult = 4;
ret = xe_eu_stall_user_extensions(xe, data, 0, &props); if (ret) return ret;
if (!props.gt) {
drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n"); return -EINVAL;
}
mutex_lock(&props.gt->eu_stall->stream_lock);
ret = xe_eu_stall_stream_open_locked(dev, &props, file);
mutex_unlock(&props.gt->eu_stall->stream_lock);
return ret;
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.16 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.