/* * The tail pointer increases in 64 byte (cacheline size), not in report_size * increments. Also report size may not be a power of 2. Compute potential * partially landed report in OA buffer.
*/
partial_report_size = xe_oa_circ_diff(stream, hw_tail, stream->oa_buffer.tail);
partial_report_size %= report_size;
/* Subtract partial amount off the tail */
hw_tail = xe_oa_circ_diff(stream, hw_tail, partial_report_size);
tail = hw_tail;
/* * Walk the stream backward until we find a report with report id and timestamp * not 0. We can't tell whether a report has fully landed in memory before the * report id and timestamp of the following report have landed. * * This is assuming that the writes of the OA unit land in memory in the order * they were written. If not : (╯°□°)╯︵ ┻━┻
*/ while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) { void *report = stream->oa_buffer.vaddr + tail;
if (oa_report_id(stream, report) || oa_timestamp(stream, report)) break;
/* * If oa buffer size is more than 16MB (exponent greater than 24), the * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set.
*/
oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK,
size_exponent > 24 ? size_exponent - 20 : size_exponent - 17);
xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0);
xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0; /* * PRM says: "This MMIO must be set before the OATAILPTR register and after the * OAHEADPTR register. This is to enable proper functionality of the overflow bit".
*/
xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf);
xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointer to read from */
stream->oa_buffer.tail = 0;
/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
}
/* * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA * buffer must be correctly initialized
*/
xe_oa_init_oa_buffer(stream);
xe_mmio_rmw32(mmio, __oa_regs(stream)->oa_ctrl, __oactrl_used_bits(stream), 0); if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl,
OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm, "wait for OA to be disabled timed out\n");
if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { /* <= XE_METEORLAKE except XE_PVC */
xe_mmio_write32(mmio, OA_TLB_INV_CR, 1); if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm, "wait for OA tlb invalidate timed out\n");
}
}
staticint xe_oa_wait_unlocked(struct xe_oa_stream *stream)
{ /* We might wait indefinitely if periodic sampling is not enabled */ if (!stream->periodic) return -EINVAL;
staticint __xe_oa_read(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{ /* Only clear our bits to avoid side-effects */
stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status,
OASTATUS_RELEVANT_BITS, 0); /* * Signal to userspace that there is non-zero OA status to read via * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl
*/ if (stream->oa_status & OASTATUS_RELEVANT_BITS) return -EIO;
/* Can't read from disabled streams */ if (!stream->enabled || !stream->sample) return -EINVAL;
if (!(file->f_flags & O_NONBLOCK)) { do {
ret = xe_oa_wait_unlocked(stream); if (ret) return ret;
mutex_lock(&stream->stream_lock);
ret = __xe_oa_read(stream, buf, count, &offset);
mutex_unlock(&stream->stream_lock);
} while (!offset && !ret);
} else {
xe_oa_buffer_check_unlocked(stream);
mutex_lock(&stream->stream_lock);
ret = __xe_oa_read(stream, buf, count, &offset);
mutex_unlock(&stream->stream_lock);
}
/* * Typically we clear pollin here in order to wait for the new hrtimer callback * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, * which means that more OA data is available than could fit in the user provided * buffer. In this case we want the next poll() call to not block. * * Also in case of -EIO, we have already waited for data before returning * -EIO, so need to wait again
*/ if (ret != -ENOSPC && ret != -EIO)
stream->pollin = false;
/* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ return offset ?: (ret ?: -EAGAIN);
}
/* * We don't explicitly check whether there's something to read here since this * path may be hot depending on what else userspace is polling, or on the timeout * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there * are samples to read
*/ if (stream->pollin)
events |= EPOLLIN;
/* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ if (stream->override_gucrc)
xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc));
bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
size, ttm_bo_type_kernel,
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo);
stream->oa_buffer.bo = bo; /* mmap implementation requires OA buffer to be in system memory */
xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
stream->oa_buffer.vaddr = bo->vmap.vaddr; return 0;
}
/* Look for the buffer in the already allocated BOs attached to the stream */
llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { if (oa_bo->oa_config == oa_config &&
memcmp(oa_bo->oa_config->uuid, oa_config->uuid, sizeof(oa_config->uuid)) == 0) goto out;
}
/* Emit OA configuration batch */
fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb); if (IS_ERR(fence)) {
err = PTR_ERR(fence); gotoexit;
}
/* Point of no return: initialize and set fence to signal */
spin_lock_init(&ofence->lock);
dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0);
for (i = 0; i < stream->num_syncs; i++) { if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL)
num_signal++;
xe_sync_entry_signal(&stream->syncs[i], &ofence->base);
}
/* Additional dma_fence_get in case we dma_fence_wait */ if (!num_signal)
dma_fence_get(&ofence->base);
/* Update last fence too before adding callback */
xe_oa_update_last_fence(stream, fence);
/* Add job fence callback to schedule work to signal ofence->base */
err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb);
xe_gt_assert(stream->gt, !err || err == -ENOENT); if (err == -ENOENT)
xe_oa_config_cb(fence, &ofence->cb);
/* If nothing needs to be signaled we wait synchronously */ if (!num_signal) {
dma_fence_wait(&ofence->base, false);
dma_fence_put(&ofence->base);
}
/* Done with syncs */ for (i = 0; i < stream->num_syncs; i++)
xe_sync_entry_cleanup(&stream->syncs[i]);
kfree(stream->syncs);
return 0; exit:
kfree(ofence); return err;
}
static u32 oag_report_ctx_switches(conststruct xe_oa_stream *stream)
{ /* If user didn't require OA reports, ask HW not to emit ctx switch reports */ return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS,
stream->sample ?
0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
}
/* * EU NOA signals behave incorrectly if EU clock gating is enabled. * Disable thread stall DOP gating and EU DOP gating.
*/ if (XE_WA(stream->gt, 1508761755)) {
xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN,
_MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2,
_MASKED_BIT_ENABLE(DISABLE_DOP_GATING));
}
if (GRAPHICS_VER(stream->oa->xe) >= 20)
oa_debug |= /* The three bits below are needed to get PEC counters running */
OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL |
OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL |
OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL;
if (xe_observation_paranoid && !perfmon_capable()) {
drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n"); return -EACCES;
}
/* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) {
drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL;
}
/* * Only support VM_READ, enforce MAP_PRIVATE by checking for * VM_MAYSHARE, don't copy the vma on fork
*/ if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_SHARED | VM_MAYSHARE)) {
drm_dbg(&stream->oa->xe->drm, "mmap must be read only\n"); return -EINVAL;
}
vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY,
VM_MAYWRITE | VM_MAYEXEC);
xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); for (i = 0; i < bo->ttm.ttm->num_pages; i++) {
ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]),
PAGE_SIZE, vma->vm_page_prot); if (ret) break;
/* * For Xe2+, when overrun mode is enabled, there are no partial reports at the end * of buffer, making the OA buffer effectively a non-power-of-2 size circular * buffer whose size, circ_size, is a multiple of the report size
*/ if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
stream->oa_buffer.circ_size =
param->oa_buffer_size -
param->oa_buffer_size % stream->oa_buffer.format->size; else
stream->oa_buffer.circ_size = param->oa_buffer_size;
stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); if (!stream->oa_config) {
drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set);
ret = -EINVAL; gotoexit;
}
/* * GuC reset of engines causes OA to lose configuration * state. Prevent this by overriding GUCRC mode.
*/ if (XE_WA(stream->gt, 1509372804)) {
ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc,
SLPC_GUCRC_MODE_GUCRC_NO_RC6); if (ret) goto err_free_configs;
stream->override_gucrc = true;
}
/* Take runtime pm ref and forcewake to disable RC6 */
xe_pm_runtime_get(stream->oa->xe);
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
ret = -ETIMEDOUT; goto err_fw_put;
}
ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size); if (ret) goto err_fw_put;
staticint xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, struct xe_oa_open_param *param)
{ struct xe_oa_stream *stream; int stream_fd; int ret;
/* We currently only allow exclusive access */ if (param->oa_unit->exclusive_stream) {
drm_dbg(&oa->xe->drm, "OA unit already in use\n");
ret = -EBUSY; gotoexit;
}
stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (!stream) {
ret = -ENOMEM; gotoexit;
}
stream->oa = oa;
ret = xe_oa_stream_init(stream, param); if (ret) goto err_free;
if (!param->disabled) {
ret = xe_oa_enable_locked(stream); if (ret) goto err_destroy;
}
stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); if (stream_fd < 0) {
ret = stream_fd; goto err_disable;
}
/* Hold a reference on the drm device till stream_fd is released */
drm_dev_get(&stream->oa->xe->drm);
/** * xe_oa_timestamp_frequency - Return OA timestamp frequency * @gt: @xe_gt * * OA timestamp frequency = CS timestamp frequency in most platforms. On some * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such * cases, return the adjusted CS timestamp frequency to the user.
*/
u32 xe_oa_timestamp_frequency(struct xe_gt *gt)
{
u32 reg, shift;
static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent)
{
u64 nom = (2ULL << exponent) * NSEC_PER_SEC;
u32 den = xe_oa_timestamp_frequency(gt);
return div_u64(nom + den - 1, den);
}
staticbool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type)
{ switch (param->oa_unit->type) { case DRM_XE_OA_UNIT_TYPE_OAG: return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR ||
type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; case DRM_XE_OA_UNIT_TYPE_OAM: case DRM_XE_OA_UNIT_TYPE_OAM_SAG: return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; default: returnfalse;
}
}
/** * xe_oa_unit_id - Return OA unit ID for a hardware engine * @hwe: @xe_hw_engine * * Return OA unit ID for a hardware engine when available
*/
u16 xe_oa_unit_id(struct xe_hw_engine *hwe)
{ return hwe->oa_unit && hwe->oa_unit->num_engines ?
hwe->oa_unit->oa_unit_id : U16_MAX;
}
/* A hwe must be assigned to stream/oa_unit for batch submissions */ staticint xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param)
{ struct xe_hw_engine *hwe; enum xe_hw_engine_id id; int ret = 0;
/* If not provided, OA unit defaults to OA unit 0 as per uapi */ if (!param->oa_unit)
param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0];
/* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) {
param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class,
param->engine_instance, true); if (!param->hwe || param->hwe->oa_unit != param->oa_unit) goto err; goto out;
}
/* Else just get the first hwe attached to the oa unit */
for_each_hw_engine(hwe, param->oa_unit->gt, id) { if (hwe->oa_unit == param->oa_unit) {
param->hwe = hwe; goto out;
}
}
/* If we still didn't find a hwe, just get one with a valid oa_unit from the same gt */
for_each_hw_engine(hwe, param->oa_unit->gt, id) { if (!hwe->oa_unit) continue;
param->hwe = hwe; goto out;
}
err:
drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n",
param->exec_q ? param->exec_q->class : -1,
param->engine_instance, param->oa_unit->oa_unit_id);
ret = -EINVAL;
out: return ret;
}
/** * xe_oa_stream_open_ioctl - Opens an OA stream * @dev: @drm_device * @data: pointer to struct @drm_xe_oa_config * @file: @drm_file * * The functions opens an OA stream. An OA stream, opened with specified * properties, enables OA counter samples to be collected, either * periodically (time based sampling), or on request (using OA queries)
*/ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file)
{ struct xe_device *xe = to_xe_device(dev); struct xe_oa *oa = &xe->oa; struct xe_file *xef = to_xe_file(file); struct xe_oa_open_param param = {}; conststruct xe_oa_format *f; bool privileged_op = true; int ret;
if (!oa->xe) {
drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); return -ENODEV;
}
param.xef = xef;
param.period_exponent = -1;
ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, ¶m); if (ret) return ret;
if (param.exec_queue_id > 0) {
param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT;
if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) return -EOPNOTSUPP;
}
/* * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, * without global stream access, can be an unprivileged operation
*/ if (param.exec_q && !param.sample)
privileged_op = false;
if (param.no_preempt) { if (!param.exec_q) {
drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n");
ret = -EINVAL; goto err_exec_q;
}
privileged_op = true;
}
if (privileged_op && xe_observation_paranoid && !perfmon_capable()) {
drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n");
ret = -EACCES; goto err_exec_q;
}
if (!param.exec_q && !param.sample) {
drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n");
ret = -EINVAL; goto err_exec_q;
}
ret = xe_oa_assign_hwe(oa, ¶m); if (ret) goto err_exec_q;
f = &oa->oa_formats[param.oa_format]; if (!param.oa_format || !f->size ||
!oa_unit_supports_oa_format(¶m, f->type)) {
drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n",
param.oa_format, f->type, f->size, param.hwe->class);
ret = -EINVAL; goto err_exec_q;
}
if (param.period_exponent >= 0) {
u64 oa_period, oa_freq_hz;
/* Requesting samples from OAG buffer is a privileged operation */ if (!param.sample) {
drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n");
ret = -EINVAL; goto err_exec_q;
}
oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent);
oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period);
drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
}
if (!param.oa_buffer_size)
param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE;
if (!param.wait_num_reports)
param.wait_num_reports = 1; if (param.wait_num_reports > param.oa_buffer_size / f->size) {
drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports);
ret = -EINVAL; goto err_exec_q;
}
ret = xe_oa_parse_syncs(oa, ¶m); if (ret) goto err_exec_q;
mutex_lock(¶m.hwe->gt->oa.gt_lock);
ret = xe_oa_stream_open_ioctl_locked(oa, ¶m);
mutex_unlock(¶m.hwe->gt->oa.gt_lock); if (ret < 0) goto err_sync_cleanup;
return ret;
err_sync_cleanup: while (param.num_syncs--)
xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]);
kfree(param.syncs);
err_exec_q: if (param.exec_q)
xe_exec_queue_put(param.exec_q); return ret;
}
/** * xe_oa_add_config_ioctl - Adds one OA config * @dev: @drm_device * @data: pointer to struct @drm_xe_oa_config * @file: @drm_file * * The functions adds an OA config to the set of OA configs maintained in * the kernel. The config determines which OA metrics are collected for an * OA stream.
*/ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file)
{ struct xe_device *xe = to_xe_device(dev); struct xe_oa *oa = &xe->oa; struct drm_xe_oa_config param; struct drm_xe_oa_config *arg = ¶m; struct xe_oa_config *oa_config, *tmp; struct xe_oa_reg *regs; int err, id;
if (!oa->xe) {
drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); return -ENODEV;
}
if (xe_observation_paranoid && !perfmon_capable()) {
drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); return -EACCES;
}
err = copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); if (XE_IOCTL_DBG(oa->xe, err)) return -EFAULT;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.