// SPDX-License-Identifier: GPL-2.0 /* * IOMMU API for ARM architected SMMUv3 implementations. * * Copyright (C) 2015 ARM Limited * * Author: Will Deacon <will.deacon@arm.com> * * This driver is powered by bad coffee and bombay mix.
*/
staticvoid queue_sync_cons_out(struct arm_smmu_queue *q)
{ /* * Ensure that all CPU accesses (reads and writes) to the queue * are complete before we update the cons pointer.
*/
__iomb();
writel_relaxed(q->llq.cons, q->cons_reg);
}
staticint queue_sync_prod_in(struct arm_smmu_queue *q)
{
u32 prod; int ret = 0;
/* * We can't use the _relaxed() variant here, as we must prevent * speculative reads of the queue before we have determined that * prod has indeed moved.
*/
prod = readl(q->prod_reg);
if (Q_OVF(prod) != Q_OVF(q->llq.prod))
ret = -EOVERFLOW;
/* * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI * payload, so the write will zero the entire command on that platform.
*/ if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
q->ent_dwords * 8;
}
arm_smmu_cmdq_build_cmd(cmd, &ent); if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
}
switch (idx) { case CMDQ_ERR_CERROR_ABT_IDX:
dev_err(smmu->dev, "retrying command fetch\n"); return; case CMDQ_ERR_CERROR_NONE_IDX: return; case CMDQ_ERR_CERROR_ATC_INV_IDX: /* * ATC Invalidation Completion timeout. CONS is still pointing * at the CMD_SYNC. Attempt to complete other pending commands * by repeating the CMD_SYNC, though we might well end up back * here since the ATC invalidation may still be pending.
*/ return; case CMDQ_ERR_CERROR_ILL_IDX: default: break;
}
/* * We may have concurrent producers, so we need to be careful * not to touch any of the shadow cmdq state.
*/
queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
dev_err(smmu->dev, "skipping command in error state:\n"); for (i = 0; i < ARRAY_SIZE(cmd); ++i)
dev_err(smmu->dev, "\t0x%016llx\n", (unsignedlonglong)cmd[i]);
/* Convert the erroneous command into a CMD_SYNC */
arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
/* * Command queue locking. * This is a form of bastardised rwlock with the following major changes: * * - The only LOCK routines are exclusive_trylock() and shared_lock(). * Neither have barrier semantics, and instead provide only a control * dependency. * * - The UNLOCK routines are supplemented with shared_tryunlock(), which * fails if the caller appears to be the last lock holder (yes, this is * racy). All successful UNLOCK routines have RELEASE semantics.
*/ staticvoid arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
{ int val;
/* * We can try to avoid the cmpxchg() loop by simply incrementing the * lock counter. When held in exclusive state, the lock counter is set * to INT_MIN so these increments won't hurt as the value will remain * negative.
*/ if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0) return;
do {
val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
}
/* * Command queue insertion. * This is made fiddly by our attempts to achieve some sort of scalability * since there is one queue shared amongst all of the CPUs in the system. If * you like mixed-size concurrency, dependency ordering and relaxed atomics, * then you'll *love* this monstrosity. * * The basic idea is to split the queue up into ranges of commands that are * owned by a given CPU; the owner may not have written all of the commands * itself, but is responsible for advancing the hardware prod pointer when * the time comes. The algorithm is roughly: * * 1. Allocate some space in the queue. At this point we also discover * whether the head of the queue is currently owned by another CPU, * or whether we are the owner. * * 2. Write our commands into our allocated slots in the queue. * * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map. * * 4. If we are an owner: * a. Wait for the previous owner to finish. * b. Mark the queue head as unowned, which tells us the range * that we are responsible for publishing. * c. Wait for all commands in our owned range to become valid. * d. Advance the hardware prod pointer. * e. Tell the next owner we've finished. * * 5. If we are inserting a CMD_SYNC (we may or may not have been an * owner), then we need to stick around until it has completed: * a. If we have MSIs, the SMMU can write back into the CMD_SYNC * to clear the first 4 bytes. * b. Otherwise, we spin waiting for the hardware cons pointer to * advance past our command. * * The devil is in the details, particularly the use of locking for handling * SYNC completion and freeing up space in the queue before we think that it is * full.
*/ staticvoid __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod, bool set)
{
u32 swidx, sbidx, ewidx, ebidx; struct arm_smmu_ll_queue llq = {
.max_n_shift = cmdq->q.llq.max_n_shift,
.prod = sprod,
};
/* * The valid bit is the inverse of the wrap bit. This means * that a zero-initialised queue is invalid and, after marking * all entries as valid, they become invalid again when we * wrap.
*/ if (set) {
atomic_long_xor(mask, ptr);
} else { /* Poll */ unsignedlong valid;
/* Mark all entries in the range [sprod, eprod) as valid */ staticvoid arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod)
{
__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
}
/* Wait for all entries in the range [sprod, eprod) to become valid */ staticvoid arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod)
{
__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
}
/* Wait for the command queue to become non-full */ staticint arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq)
{ unsignedlong flags; struct arm_smmu_queue_poll qp; int ret = 0;
/* * Try to update our copy of cons by grabbing exclusive cmdq access. If * that fails, spin until somebody else updates it for us.
*/ if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
llq->val = READ_ONCE(cmdq->q.llq.val); return 0;
}
queue_poll_init(smmu, &qp); do {
llq->val = READ_ONCE(cmdq->q.llq.val); if (!queue_full(llq)) break;
ret = queue_poll(&qp);
} while (!ret);
return ret;
}
/* * Wait until the SMMU signals a CMD_SYNC completion MSI. * Must be called with the cmdq lock held in some capacity.
*/ staticint __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq)
{ int ret = 0; struct arm_smmu_queue_poll qp;
u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
queue_poll_init(smmu, &qp);
/* * The MSI won't generate an event, since it's being written back * into the command queue.
*/
qp.wfe = false;
smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1); return ret;
}
/* * Wait until the SMMU cons index passes llq->prod. * Must be called with the cmdq lock held in some capacity.
*/ staticint __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq)
{ struct arm_smmu_queue_poll qp;
u32 prod = llq->prod; int ret = 0;
queue_poll_init(smmu, &qp);
llq->val = READ_ONCE(cmdq->q.llq.val); do { if (queue_consumed(llq, prod)) break;
ret = queue_poll(&qp);
/* * This needs to be a readl() so that our subsequent call * to arm_smmu_cmdq_shared_tryunlock() can fail accurately. * * Specifically, we need to ensure that we observe all * shared_lock()s by other CMD_SYNCs that share our owner, * so that a failing call to tryunlock() means that we're * the last one out and therefore we can safely advance * cmdq->q.llq.cons. Roughly speaking: * * CPU 0 CPU1 CPU2 (us) * * if (sync) * shared_lock(); * * dma_wmb(); * set_valid_map(); * * if (owner) { * poll_valid_map(); * <control dependency> * writel(prod_reg); * * readl(cons_reg); * tryunlock(); * * Requires us to see CPU 0's shared_lock() acquisition.
*/
llq->cons = readl(cmdq->q.cons_reg);
} while (!ret);
/* * This is the actual insertion function, and provides the following * ordering guarantees to callers: * * - There is a dma_wmb() before publishing any commands to the queue. * This can be relied upon to order prior writes to data structures * in memory (such as a CD or an STE) before the command. * * - On completion of a CMD_SYNC, there is a control dependency. * This can be relied upon to order subsequent writes to memory (e.g. * freeing an IOVA) after completion of the CMD_SYNC. * * - Command insertion is totally ordered, so if two CPUs each race to * insert their own list of commands then all of the commands from one * CPU will appear before any of the commands from the other CPU.
*/ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, bool sync)
{
u64 cmd_sync[CMDQ_ENT_DWORDS];
u32 prod; unsignedlong flags; bool owner; struct arm_smmu_ll_queue llq, head; int ret = 0;
llq.max_n_shift = cmdq->q.llq.max_n_shift;
/* 1. Allocate some space in the queue */
local_irq_save(flags);
llq.val = READ_ONCE(cmdq->q.llq.val); do {
u64 old;
while (!queue_has_space(&llq, n + sync)) {
local_irq_restore(flags); if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
local_irq_save(flags);
}
/* * 2. Write our commands into the queue * Dependency ordering from the cmpxchg() loop above.
*/
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); if (sync) {
prod = queue_inc_prod_n(&llq, n);
arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
/* * In order to determine completion of our CMD_SYNC, we must * ensure that the queue can't wrap twice without us noticing. * We achieve that by taking the cmdq lock as shared before * marking our slot as valid.
*/
arm_smmu_cmdq_shared_lock(cmdq);
}
/* 3. Mark our slots as valid, ensuring commands are visible first */
dma_wmb();
arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
/* 4. If we are the owner, take control of the SMMU hardware */ if (owner) { /* a. Wait for previous owner to finish */
atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
/* b. Stop gathering work by clearing the owned flag */
prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
&cmdq->q.llq.atomic.prod);
prod &= ~CMDQ_PROD_OWNED_FLAG;
/* * c. Wait for any gathered work to be written to the queue. * Note that we read our own entries so that we have the control * dependency required by (d).
*/
arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
/* * d. Advance the hardware prod pointer * Control dependency ordering from the entries becoming valid.
*/
writel_relaxed(prod, cmdq->q.prod_reg);
/* * e. Tell the next owner we're done * Make sure we've updated the hardware first, so that we don't * race to update prod and potentially move it backwards.
*/
atomic_set_release(&cmdq->owner_prod, prod);
}
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ if (sync) {
llq.prod = queue_inc_prod_n(&llq, n);
ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq); if (ret) {
dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
llq.prod,
readl_relaxed(cmdq->q.prod_reg),
readl_relaxed(cmdq->q.cons_reg));
}
/* * Try to unlock the cmdq lock. This will fail if we're the last * reader, in which case we can safely update cmdq->q.llq.cons
*/ if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
arm_smmu_cmdq_shared_unlock(cmdq);
}
}
cmd.opcode = CMDQ_OP_RESUME;
cmd.resume.sid = sid;
cmd.resume.stag = resp->grpid; switch (resp->code) { case IOMMU_PAGE_RESP_INVALID: case IOMMU_PAGE_RESP_FAILURE:
cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT; break; case IOMMU_PAGE_RESP_SUCCESS:
cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY; break; default: break;
}
arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); /* * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP. * RESUME consumption guarantees that the stalled transaction will be * terminated... at some point in the future. PRI_RESP is fire and * forget.
*/
}
/* * Based on the value of ent report which bits of the STE the HW will access. It * would be nice if this was complete according to the spec, but minimally it * has to capture the bits this driver uses.
*/
VISIBLE_IF_KUNIT void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
{ unsignedint cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
used_bits[0] = cpu_to_le64(STRTAB_STE_0_V); if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V))) return;
/* * See 13.5 Summary of attribute/permission configuration fields * for the SHCFG behavior.
*/ if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
STRTAB_STE_1_S1DSS_BYPASS)
used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
}
if (cfg == STRTAB_STE_0_CFG_BYPASS)
used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
/* * Figure out if we can do a hitless update of entry to become target. Returns a * bit mask where 1 indicates that qword needs to be set disruptively. * unused_update is an intermediate value of entry that has unused bits set to * their new values.
*/ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, const __le64 *entry, const __le64 *target,
__le64 *unused_update)
{
__le64 target_used[NUM_ENTRY_QWORDS] = {};
__le64 cur_used[NUM_ENTRY_QWORDS] = {};
u8 used_qword_diff = 0; unsignedint i;
for (i = 0; i != NUM_ENTRY_QWORDS; i++) { /* * Check that masks are up to date, the make functions are not * allowed to set a bit to 1 if the used function doesn't say it * is used.
*/
WARN_ON_ONCE(target[i] & ~target_used[i]);
/* Bits can change because they are not currently being used */
unused_update[i] = (entry[i] & cur_used[i]) |
(target[i] & ~cur_used[i]); /* * Each bit indicates that a used bit in a qword needs to be * changed after unused_update is applied.
*/ if ((unused_update[i] & target_used[i]) != target[i])
used_qword_diff |= 1 << i;
} return used_qword_diff;
}
for (i = start; len != 0; len--, i++) { if (entry[i] != target[i]) {
WRITE_ONCE(entry[i], target[i]);
changed = true;
}
}
if (changed)
writer->ops->sync(writer); return changed;
}
/* * Update the STE/CD to the target configuration. The transition from the * current entry to the target entry takes place over multiple steps that * attempts to make the transition hitless if possible. This function takes care * not to create a situation where the HW can perceive a corrupted entry. HW is * only required to have a 64 bit atomicity with stores from the CPU, while * entries are many 64 bit values big. * * The difference between the current value and the target value is analyzed to * determine which of three updates are required - disruptive, hitless or no * change. * * In the most general disruptive case we can make any update in three steps: * - Disrupting the entry (V=0) * - Fill now unused qwords, execpt qword 0 which contains V * - Make qword 0 have the final value and valid (V=1) with a single 64 * bit store * * However this disrupts the HW while it is happening. There are several * interesting cases where a STE/CD can be updated without disturbing the HW * because only a small number of bits are changing (S1DSS, CONFIG, etc) or * because the used bits don't intersect. We can detect this by calculating how * many 64 bit values need update after adjusting the unused bits and skip the * V=0 process. This relies on the IGNORED behavior described in the * specification.
*/
VISIBLE_IF_KUNIT void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry, const __le64 *target)
{
__le64 unused_update[NUM_ENTRY_QWORDS];
u8 used_qword_diff;
used_qword_diff =
arm_smmu_entry_qword_diff(writer, entry, target, unused_update); if (hweight8(used_qword_diff) == 1) { /* * Only one qword needs its used bits to be changed. This is a * hitless update, update all bits the current STE/CD is * ignoring to their new values, then update a single "critical * qword" to change the STE/CD and finally 0 out any bits that * are now unused in the target configuration.
*/ unsignedint critical_qword_index = ffs(used_qword_diff) - 1;
/* * Skip writing unused bits in the critical qword since we'll be * writing it in the next step anyways. This can save a sync * when the only change is in that qword.
*/
unused_update[critical_qword_index] =
entry[critical_qword_index];
entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
entry_set(writer, entry, target, critical_qword_index, 1);
entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
} elseif (used_qword_diff) { /* * At least two qwords need their inuse bits to be changed. This * requires a breaking update, zero the V bit, write all qwords * but 0, then set qword 0
*/
unused_update[0] = 0;
entry_set(writer, entry, unused_update, 0, 1);
entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
entry_set(writer, entry, target, 0, 1);
} else { /* * No inuse bit changed. Sanity check that all unused bits are 0 * in the entry. The target was already sanity checked by * compute_qword_diff().
*/
WARN_ON_ONCE(
entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
}
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
/* * If EPD0 is set by the make function it means * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
*/ if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
used_bits[0] &= ~cpu_to_le64(
CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
CTXDESC_CD_0_TCR_SH0);
used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
}
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
/* To enable dirty flag update, set both Access flag and dirty state update */ if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
CTXDESC_CD_0_TCR_HD);
if (smmu->features & ARM_SMMU_FEAT_E2H) { /* * To support BTM the streamworld needs to match the * configuration of the CPU so that the ASID broadcasts are * properly matched. This means either S/NS-EL2-E2H (hypervisor) * or NS-EL1 (guest). Since an SVA domain can be installed in a * PASID this should always use a BTM compatible configuration * if the HW supports it.
*/
target->data[1] |= cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
} else {
target->data[1] |= cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
/* * VMID 0 is reserved for stage-2 bypass EL1 STEs, see * arm_smmu_domain_alloc_id()
*/
target->data[2] =
cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
}
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
/* * This can safely directly manipulate the STE memory without a sync sequence * because the STE table has not been installed in the SMMU yet.
*/ staticvoid arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, unsignedint nent)
{ unsignedint i;
for (i = 0; i < nent; ++i) {
arm_smmu_make_abort_ste(strtab);
strtab++;
}
}
switch (event->id) { case EVT_ID_BAD_STE_CONFIG: case EVT_ID_STREAM_DISABLED_FAULT: case EVT_ID_BAD_SUBSTREAMID_CONFIG: case EVT_ID_BAD_CD_CONFIG: case EVT_ID_TRANSLATION_FAULT: case EVT_ID_ADDR_SIZE_FAULT: case EVT_ID_ACCESS_FAULT: case EVT_ID_PERMISSION_FAULT: break; default: return -EOPNOTSUPP;
}
if (event->stall) { if (event->read)
perm |= IOMMU_FAULT_PERM_READ; else
perm |= IOMMU_FAULT_PERM_WRITE;
if (event->instruction)
perm |= IOMMU_FAULT_PERM_EXEC;
if (event->privileged)
perm |= IOMMU_FAULT_PERM_PRIV;
mutex_lock(&smmu->streams_mutex);
master = arm_smmu_find_master(smmu, event->sid); if (!master) {
ret = -EINVAL; goto out_unlock;
}
if (event->stall)
ret = iommu_report_device_fault(master->dev, &fault_evt); elseif (master->vmaster && !event->s2)
ret = arm_vmaster_report_event(master->vmaster, evt); else
ret = -EOPNOTSUPP; /* Unhandled events should be pinned */
out_unlock:
mutex_unlock(&smmu->streams_mutex); return ret;
}
do { while (!queue_remove_raw(q, evt)) {
arm_smmu_decode_event(smmu, evt, &event); if (arm_smmu_handle_event(smmu, evt, &event))
arm_smmu_dump_event(smmu, evt, &event, &rs);
put_device(event.dev);
cond_resched();
}
/* * Not much we can do on overflow, so scream and pretend we're * trying harder.
*/ if (queue_sync_prod_in(q) == -EOVERFLOW)
dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
} while (!queue_empty(llq));
/* Sync our overflow flag, as we believe we're up to speed */
queue_sync_cons_ovf(q); return IRQ_HANDLED;
}
/* * ATS and PASID: * * If substream_valid is clear, the PCIe TLP is sent without a PASID * prefix. In that case all ATC entries within the address range are * invalidated, including those that were requested with a PASID! There * is no way to invalidate only entries without PASID. * * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID * traffic), translation requests without PASID create ATC entries * without PASID, which must be invalidated with substream_valid clear. * This has the unpleasant side-effect of invalidating all PASID-tagged * ATC entries within the address range.
*/
*cmd = (struct arm_smmu_cmdq_ent) {
.opcode = CMDQ_OP_ATC_INV,
.substream_valid = (ssid != IOMMU_NO_PASID),
.atc.ssid = ssid,
};
if (!size) {
cmd->atc.size = ATC_INV_SIZE_ALL; return;
}
/* * In an ATS Invalidate Request, the address must be aligned on the * range size, which must be a power of two number of page sizes. We * thus have to choose between grossly over-invalidating the region, or * splitting the invalidation into multiple commands. For simplicity * we'll go with the first solution, but should refine it in the future * if multiple commands are shown to be more efficient. * * Find the smallest power of two that covers the range. The most * significant differing bit between the start and end addresses, * fls(start ^ end), indicates the required span. For example: * * We want to invalidate pages [8; 11]. This is already the ideal range: * x = 0b1000 ^ 0b1011 = 0b11 * span = 1 << fls(x) = 4 * * To invalidate pages [7; 10], we need to invalidate [0; 15]: * x = 0b0111 ^ 0b1010 = 0b1101 * span = 1 << fls(x) = 16
*/
log2_span = fls_long(page_start ^ page_end);
span_mask = (1ULL << log2_span) - 1;
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) return 0;
/* * Ensure that we've completed prior invalidation of the main TLBs * before we read 'nr_ats_masters' in case of a concurrent call to * arm_smmu_enable_ats(): * * // unmap() // arm_smmu_enable_ats() * TLBI+SYNC atomic_inc(&nr_ats_masters); * smp_mb(); [...] * atomic_read(&nr_ats_masters); pci_enable_ats() // writel() * * Ensures that we always see the incremented 'nr_ats_masters' count if * ATS was enabled at the PCI device before completion of the TLBI.
*/
smp_mb(); if (!atomic_read(&smmu_domain->nr_ats_masters)) return 0;
if (master_domain->nested_ats_flush) { /* * If a S2 used as a nesting parent is changed we have * no option but to completely flush the ATC.
*/
arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
} else {
arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
&cmd);
}
for (i = 0; i < master->num_streams; i++) {
cmd.atc.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
}
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
/* * NOTE: when io-pgtable is in non-strict mode, we may get here with * PTEs previously cleared by unmaps on the current CPU not yet visible * to the SMMU. We are relying on the dma_wmb() implicit during cmd * insertion to guarantee those are observed before the TLBI. Do be * careful, 007.
*/ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
} else {
cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
}
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { /* Get the leaf page size */
tg = __ffs(smmu_domain->domain.pgsize_bitmap);
num_pages = size >> tg;
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
cmd->tlbi.tg = (tg - 10) / 2;
/* * Determine what level the granule is at. For non-leaf, both * io-pgtable and SVA pass a nominal last-level granule because * they don't know what level(s) actually apply, so ignore that * and leave TTL=0. However for various errata reasons we still * want to use a range command, so avoid the SVA corner case * where both scale and num could be 0 as well.
*/ if (cmd->tlbi.leaf)
cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); elseif ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
num_pages++;
}
arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
while (iova < end) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { /* * On each iteration of the loop, the range is 5 bits * worth of the aligned size remaining. * The range in pages is: * * range = (num_pages & (0x1f << __ffs(num_pages)))
*/ unsignedlong scale, num;
/* Determine the power of 2 multiple number of pages */
scale = __ffs(num_pages);
cmd->tlbi.scale = scale;
/* Determine how many chunks of 2^scale size we have */
num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
cmd->tlbi.num = num - 1;
/* range is num * 2^scale * pgsize */
inv_range = num << (scale + tg);
/* Clear out the lower order bits for the next iteration */
num_pages -= num << scale;
}
if (smmu_domain->nest_parent) { /* * When the S2 domain changes all the nested S1 ASIDs have to be * flushed too.
*/
cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
}
/* * Unfortunately, this can't be leaf-only since we may have * zapped an entire table.
*/
arm_smmu_atc_inv_domain(smmu_domain, iova, size);
}
/* Free the ASID or VMID */ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { /* Prevent SVA from touching the CD while we're freeing it */
mutex_lock(&arm_smmu_asid_lock);
xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
mutex_unlock(&arm_smmu_asid_lock);
} else { struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; if (cfg->vmid)
ida_free(&smmu->vmid_map, cfg->vmid);
}
/* Prevent SVA from modifying the ASID until it is written to the CD */
mutex_lock(&arm_smmu_asid_lock);
ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
cd->asid = (u16)asid;
mutex_unlock(&arm_smmu_asid_lock); return ret;
}
/* Smallest Translation Unit: log2 of the smallest supported granule */
stu = __ffs(smmu->pgsize_bitmap);
pdev = to_pci_dev(master->dev);
/* * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
*/
arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); if (pci_enable_ats(pdev, stu))
dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
}
staticint arm_smmu_enable_pasid(struct arm_smmu_master *master)
{ int ret; int features; int num_pasids; struct pci_dev *pdev;
if (!dev_is_pci(master->dev)) return -ENODEV;
pdev = to_pci_dev(master->dev);
features = pci_pasid_features(pdev); if (features < 0) return features;
num_pasids = pci_max_pasids(pdev); if (num_pasids <= 0) return num_pasids;
ret = pci_enable_pasid(pdev, features); if (ret) {
dev_err(&pdev->dev, "Failed to enable PASID\n"); return ret;
}
/* * If the domain uses the smmu_domain->devices list return the arm_smmu_domain * structure, otherwise NULL. These domains track attached devices so they can * issue invalidations.
*/ staticstruct arm_smmu_domain *
to_smmu_domain_devices(struct iommu_domain *domain)
{ /* The domain can be NULL only when processing the first attach */ if (!domain) return NULL; if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
domain->type == IOMMU_DOMAIN_SVA) return to_smmu_domain(domain); if (domain->type == IOMMU_DOMAIN_NESTED) return to_smmu_nested_domain(domain)->vsmmu->s2_parent; return NULL;
}
staticint arm_smmu_enable_iopf(struct arm_smmu_master *master, struct arm_smmu_master_domain *master_domain)
{ int ret;
iommu_group_mutex_assert(master->dev);
if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA)) return -EOPNOTSUPP;
/* * Drivers for devices supporting PRI or stall require iopf others have * device-specific fault handlers and don't need IOPF, so this is not a * failure.
*/ if (!master->stall_enabled) return 0;
/* We're not keeping track of SIDs in fault events */ if (master->num_streams != 1) return -EOPNOTSUPP;
if (master->iopf_refcount) {
master->iopf_refcount++;
master_domain->using_iopf = true; return 0;
}
ret = iopf_queue_add_device(master->smmu->evtq.iopf, master->dev); if (ret) return ret;
master->iopf_refcount = 1;
master_domain->using_iopf = true; return 0;
}
/* * Start the sequence to attach a domain to a master. The sequence contains three * steps: * arm_smmu_attach_prepare() * arm_smmu_install_ste_for_dev() * arm_smmu_attach_commit() * * If prepare succeeds then the sequence must be completed. The STE installed * must set the STE.EATS field according to state.ats_enabled. * * If the device supports ATS then this determines if EATS should be enabled * in the STE, and starts sequencing EATS disable if required. * * The change of the EATS in the STE and the PCI ATS config space is managed by * this sequence to be in the right order so that if PCI ATS is enabled then * STE.ETAS is enabled. * * new_domain can be a non-paging domain. In this case ATS will not be enabled, * and invalidations won't be tracked.
*/ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, struct iommu_domain *new_domain)
{ struct arm_smmu_master *master = state->master; struct arm_smmu_master_domain *master_domain; struct arm_smmu_domain *smmu_domain =
to_smmu_domain_devices(new_domain); unsignedlong flags; int ret;
/* * arm_smmu_share_asid() must not see two domains pointing to the same * arm_smmu_master_domain contents otherwise it could randomly write one * or the other to the CD.
*/
lockdep_assert_held(&arm_smmu_asid_lock);
if (smmu_domain || state->cd_needs_ats) { /* * The SMMU does not support enabling ATS with bypass/abort. * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS * Translation Requests and Translated transactions are denied * as though ATS is disabled for the stream (STE.EATS == 0b00), * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events * (IHI0070Ea 5.2 Stream Table Entry). * * However, if we have installed a CD table and are using S1DSS * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS * skipping stage 1". * * Disable ATS if we are going to create a normal 0b100 bypass * STE.
*/
state->ats_enabled = !state->disable_ats &&
arm_smmu_ats_supported(master);
}
if (smmu_domain) { if (new_domain->type == IOMMU_DOMAIN_NESTED) {
ret = arm_smmu_attach_prepare_vmaster(
state, to_smmu_nested_domain(new_domain)); if (ret) return ret;
}
master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); if (!master_domain) {
ret = -ENOMEM; goto err_free_vmaster;
}
master_domain->domain = new_domain;
master_domain->master = master;
master_domain->ssid = state->ssid; if (new_domain->type == IOMMU_DOMAIN_NESTED)
master_domain->nested_ats_flush =
to_smmu_nested_domain(new_domain)->enable_ats;
if (new_domain->iopf_handler) {
ret = arm_smmu_enable_iopf(master, master_domain); if (ret) goto err_free_master_domain;
}
/* * During prepare we want the current smmu_domain and new * smmu_domain to be in the devices list before we change any * HW. This ensures that both domains will send ATS * invalidations to the master until we are done. * * It is tempting to make this list only track masters that are * using ATS, but arm_smmu_share_asid() also uses this to change * the ASID of a domain, unrelated to ATS. * * Notice if we are re-attaching the same domain then the list * will have two identical entries and commit will remove only * one of them.
*/
spin_lock_irqsave(&smmu_domain->devices_lock, flags); if (smmu_domain->enforce_cache_coherency &&
!arm_smmu_master_canwbs(master)) {
spin_unlock_irqrestore(&smmu_domain->devices_lock,
flags);
ret = -EINVAL; goto err_iopf;
}
if (state->ats_enabled)
atomic_inc(&smmu_domain->nr_ats_masters);
list_add(&master_domain->devices_elm, &smmu_domain->devices);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
}
if (!state->ats_enabled && master->ats_enabled) {
pci_disable_ats(to_pci_dev(master->dev)); /* * This is probably overkill, but the config write for disabling * ATS should complete before the STE is configured to generate * UR to avoid AER noise.
*/
wmb();
} return 0;
/* * Commit is done after the STE/CD are configured with the EATS setting. It * completes synchronizing the PCI device's ATC and finishes manipulating the * smmu_domain->devices list.
*/ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
{ struct arm_smmu_master *master = state->master;
lockdep_assert_held(&arm_smmu_asid_lock);
arm_smmu_attach_commit_vmaster(state);
if (state->ats_enabled && !master->ats_enabled) {
arm_smmu_enable_ats(master);
} elseif (state->ats_enabled && master->ats_enabled) { /* * The translation has changed, flush the ATC. At this point the * SMMU is translating for the new domain and both the old&new * domain will issue invalidations.
*/
arm_smmu_atc_inv_master(master, state->ssid);
} elseif (!state->ats_enabled && master->ats_enabled) { /* ATS is being switched off, invalidate the entire ATC */
arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
}
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); if (!cdptr) return -ENOMEM;
} elseif (arm_smmu_ssids_in_use(&master->cd_table)) return -EBUSY;
/* * Prevent arm_smmu_share_asid() from trying to change the ASID * of either the old or new domain while we are working on it. * This allows the STE and the smmu_domain->devices list to * be inconsistent during this routine.
*/
mutex_lock(&arm_smmu_asid_lock);
ret = arm_smmu_attach_prepare(&state, domain); if (ret) {
mutex_unlock(&arm_smmu_asid_lock); return ret;
}
switch (smmu_domain->stage) { case ARM_SMMU_DOMAIN_S1: { struct arm_smmu_cd target_cd;
if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) return -EINVAL;
/* * We can read cd.asid outside the lock because arm_smmu_set_pasid() * will fix it
*/
arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
&target_cd, old);
}
/* * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior * using s1dss if necessary. If the cd_table is already installed then * the S1DSS is correct and this will just update the EATS. Otherwise it * installs the entire thing. This will be hitless.
*/
arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss);
arm_smmu_install_ste_for_dev(master, &ste);
}
cdptr = arm_smmu_alloc_cd_ptr(master, pasid); if (!cdptr) return -ENOMEM;
mutex_lock(&arm_smmu_asid_lock);
ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain); if (ret) goto out_unlock;
/* * We don't want to obtain to the asid_lock too early, so fix up the * caller set ASID under the lock in case it changed.
*/
cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID);
cd->data[0] |= cpu_to_le64(
FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid));
/* * When the last user of the CD table goes away downgrade the STE back * to a non-cd_table one.
*/ if (!arm_smmu_ssids_in_use(&master->cd_table)) { struct iommu_domain *sid_domain =
iommu_get_domain_for_dev(master->dev);
/* * Do not allow any ASID to be changed while are working on the STE, * otherwise we could miss invalidations.
*/
mutex_lock(&arm_smmu_asid_lock);
/* * If the CD table is not in use we can use the provided STE, otherwise * we use a cdtable STE with the provided S1DSS.
*/ if (arm_smmu_ssids_in_use(&master->cd_table)) { /* * If a CD table has to be present then we need to run with ATS * on because we have to assume a PASID is using ATS. For * IDENTITY this will setup things so that S1DSS=bypass which * follows the explanation in "13.6.4 Full ATS skipping stage 1" * and allows for ATS on the RID to work.
*/
state.cd_needs_ats = true;
arm_smmu_attach_prepare(&state, domain);
arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss);
} else {
arm_smmu_attach_prepare(&state, domain);
}
arm_smmu_install_ste_for_dev(master, ste);
arm_smmu_attach_commit(&state);
mutex_unlock(&arm_smmu_asid_lock);
/* * This has to be done after removing the master from the * arm_smmu_domain->devices to avoid races updating the same context * descriptor from arm_smmu_share_asid().
*/
arm_smmu_clear_cd(master, IOMMU_NO_PASID);
}
staticint arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
{ /* Check the SIDs are in range of the SMMU and our stream table */ if (!arm_smmu_sid_in_range(smmu, sid)) return -ERANGE;
/* Ensure l2 strtab is initialised */ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) return arm_smmu_init_l2_strtab(smmu, sid);
return 0;
}
staticint arm_smmu_insert_master(struct arm_smmu_device *smmu, struct arm_smmu_master *master)
{ int i; int ret = 0; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++)
rb_erase(&master->streams[i].node, &smmu->streams);
mutex_unlock(&smmu->streams_mutex);
/* * Note that PASID must be enabled before, and disabled after ATS: * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register * * Behavior is undefined if this bit is Set and the value of the PASID * Enable, Execute Requested Enable, or Privileged Mode Requested bits * are changed.
*/
arm_smmu_enable_pasid(master);
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
master->ssid_bits = min_t(u8, master->ssid_bits,
CTXDESC_LINEAR_CDMAX);
/* Put the STE back to what arm_smmu_init_strtab() sets */ if (dev->iommu->require_direct)
arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev); else
arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master); if (arm_smmu_cdtab_allocated(&master->cd_table))
arm_smmu_free_cd_tables(master);
kfree(master);
}
/* * We don't support devices sharing stream IDs other than PCI RID * aliases, since the necessary ID-to-device lookup becomes rather * impractical given a potential sparse 32-bit stream ID space.
*/ if (dev_is_pci(dev))
group = pci_device_group(dev); else
group = generic_device_group(dev);
region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
prot, IOMMU_RESV_SW_MSI, GFP_KERNEL); if (!region) return;
list_add_tail(®ion->list, head);
iommu_dma_get_resv_regions(dev, head);
}
/* * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the * PCIe link and save the data to memory by DMA. The hardware is restricted to * use identity mapping only.
*/ #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
(pdev)->device == 0xa12e)
static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
{
int ret;
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
ret = arm_smmu_init_strtab_2lvl(smmu);
else
ret = arm_smmu_init_strtab_linear(smmu);
if (ret)
return ret;
ida_init(&smmu->vmid_map);
return 0;
}
static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
{
int ret;
if (smmu->features & ARM_SMMU_FEAT_PRI)
writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
else
nvec--;
if (!(smmu->features & ARM_SMMU_FEAT_MSI))
return;
if (!dev->msi.domain) {
dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
return;
}
/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
if (ret) {
dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
return;
}
/* Add callback to free MSIs on teardown */
devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
}
static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
{
int irq, ret;
arm_smmu_setup_msis(smmu);
/* Request interrupt lines */
irq = smmu->evtq.q.irq;
if (irq) {
ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
arm_smmu_evtq_thread,
IRQF_ONESHOT,
"arm-smmu-v3-evtq", smmu);
if (ret < 0)
dev_warn(smmu->dev, "failed to enable evtq irq\n");
} else {
dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
}
irq = smmu->gerr_irq;
if (irq) {
ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
0, "arm-smmu-v3-gerror", smmu);
if (ret < 0)
dev_warn(smmu->dev, "failed to enable gerror irq\n");
} else {
dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
}
if (smmu->features & ARM_SMMU_FEAT_PRI) {
irq = smmu->priq.q.irq;
if (irq) {
ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
arm_smmu_priq_thread,
IRQF_ONESHOT,
"arm-smmu-v3-priq",
smmu);
if (ret < 0)
dev_warn(smmu->dev,
"failed to enable priq irq\n");
} else {
dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
}
}
}
static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
{
int ret, irq;
u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
/* Disable IRQs first */
ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
ARM_SMMU_IRQ_CTRLACK);
if (ret) {
dev_err(smmu->dev, "failed to disable irqs\n");
return ret;
}
irq = smmu->combined_irq;
if (irq) {
/*
* Cavium ThunderX2 implementation doesn't support unique irq
* lines. Use a single irq line for all the SMMUv3 interrupts.
*/
ret = devm_request_threaded_irq(smmu->dev, irq,
arm_smmu_combined_irq_handler,
arm_smmu_combined_irq_thread,
IRQF_ONESHOT,
"arm-smmu-v3-combined-irq", smmu);
if (ret < 0)
dev_warn(smmu->dev, "failed to enable combined irq\n");
} else
arm_smmu_setup_unique_irqs(smmu);
if (smmu->features & ARM_SMMU_FEAT_PRI)
irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
/* Enable interrupt generation on the SMMU */
ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
if (ret)
dev_warn(smmu->dev, "failed to enable irqs\n");
return 0;
}
static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
{
int ret;
ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
if (ret)
dev_err(smmu->dev, "failed to clear cr0\n");
enables |= CR0_PRIQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
if (ret) {
dev_err(smmu->dev, "failed to enable PRI queue\n");
return ret;
}
}
if (smmu->features & ARM_SMMU_FEAT_ATS) {
enables |= CR0_ATSCHK;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
if (ret) {
dev_err(smmu->dev, "failed to enable ATS check\n");
return ret;
}
}
ret = arm_smmu_setup_irqs(smmu);
if (ret) {
dev_err(smmu->dev, "failed to setup irqs\n");
return ret;
}
if (is_kdump_kernel())
enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
/* Enable the SMMU interface */
enables |= CR0_SMMUEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
if (ret) {
dev_err(smmu->dev, "failed to enable SMMU interface\n");
return ret;
}
if (smmu->impl_ops && smmu->impl_ops->device_reset) {
ret = smmu->impl_ops->device_reset(smmu);
if (ret) {
dev_err(smmu->dev, "failed to reset impl\n");
return ret;
}
}
if (reg & IDR0_CD2L)
smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
/*
* Translation table endianness.
* We currently require the same endianness as the CPU, but this
* could be changed later by adding a new IO_PGTABLE_QUIRK.
*/
switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
case IDR0_TTENDIAN_MIXED:
smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
break;
#ifdef __BIG_ENDIAN
case IDR0_TTENDIAN_BE:
smmu->features |= ARM_SMMU_FEAT_TT_BE;
break;
#else
case IDR0_TTENDIAN_LE:
smmu->features |= ARM_SMMU_FEAT_TT_LE;
break;
#endif
default:
dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
return -ENXIO;
}
if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
smmu->features |= ARM_SMMU_FEAT_ATS;
if (reg & IDR0_SEV)
smmu->features |= ARM_SMMU_FEAT_SEV;
if (reg & IDR0_MSI) {
smmu->features |= ARM_SMMU_FEAT_MSI;
if (coherent && !disable_msipolling)
smmu->options |= ARM_SMMU_OPT_MSIPOLL;
}
if (reg & IDR0_HYP) {
smmu->features |= ARM_SMMU_FEAT_HYP;
if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
smmu->features |= ARM_SMMU_FEAT_E2H;
}
arm_smmu_get_httu(smmu, reg);
/*
* The coherency feature as set by FW is used in preference to the ID
* register, but warn on mismatch.
*/
if (!!(reg & IDR0_COHACC) != coherent)
dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
str_true_false(coherent));
switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
case IDR0_STALL_MODEL_FORCE:
smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
fallthrough;
case IDR0_STALL_MODEL_STALL:
smmu->features |= ARM_SMMU_FEAT_STALLS;
}
if (reg & IDR0_S1P)
smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
if (reg & IDR0_S2P)
smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
if (!(reg & (IDR0_S1P | IDR0_S2P))) {
dev_err(smmu->dev, "no translation support!\n");
return -ENXIO;
}
/* We only support the AArch64 table format at present */
switch (FIELD_GET(IDR0_TTF, reg)) {
case IDR0_TTF_AARCH32_64:
smmu->ias = 40;
fallthrough;
case IDR0_TTF_AARCH64:
break;
default:
dev_err(smmu->dev, "AArch64 table format not supported!\n");
return -ENXIO;
}
if (reg & IDR1_ATTR_TYPES_OVR)
smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
/* Queue sizes, capped to ensure natural alignment */
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
/*
* We don't support splitting up batches, so one batch of
* commands plus an extra sync needs to fit inside the command
* queue. There's also no way we can handle the weird alignment
* restrictions on the base pointer for a unit-length queue.
*/
dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
CMDQ_BATCH_ENTRIES);
return -ENXIO;
}
/*
* If the SMMU supports fewer bits than would fill a single L2 stream
* table, use a linear table instead.
*/
if (smmu->sid_bits <= STRTAB_SPLIT)
smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
/* IDR3 */
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
if (FIELD_GET(IDR3_RIL, reg))
smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
if (FIELD_GET(IDR3_FWB, reg))
smmu->features |= ARM_SMMU_FEAT_S2FWB;
if (FIELD_GET(IDR3_BBM, reg) == 2)
smmu->features |= ARM_SMMU_FEAT_BBML2;
/* Output address size */
switch (FIELD_GET(IDR5_OAS, reg)) {
case IDR5_OAS_32_BIT:
smmu->oas = 32;
break;
case IDR5_OAS_36_BIT:
smmu->oas = 36;
break;
case IDR5_OAS_40_BIT:
smmu->oas = 40;
break;
case IDR5_OAS_42_BIT:
smmu->oas = 42;
break;
case IDR5_OAS_44_BIT:
smmu->oas = 44;
break;
case IDR5_OAS_52_BIT:
smmu->oas = 52;
smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
break;
default:
dev_info(smmu->dev,
"unknown output address size. Truncating to 48-bit\n");
fallthrough;
case IDR5_OAS_48_BIT:
smmu->oas = 48;
}
/* Set the DMA mask for our table walker */
if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
dev_warn(smmu->dev,
"failed to set DMA mask for table walker\n");
switch (iort_smmu->model) {
case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
break;
case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
break;
case ACPI_IORT_SMMU_V3_GENERIC:
/*
* Tegra241 implementation stores its SMMU options and impl_dev
* in DSDT. Thus, go through the ACPI tables unconditionally.
*/
acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
break;
}
list_for_each_entry(e, &rmr_list, list) {
struct iommu_iort_rmr_data *rmr;
int ret, i;
rmr = container_of(e, struct iommu_iort_rmr_data, rr);
for (i = 0; i < rmr->num_sids; i++) {
ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
if (ret) {
dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
rmr->sids[i]);
continue;
}
/*
* STE table is not programmed to HW, see
* arm_smmu_initial_bypass_stes()
*/
arm_smmu_make_bypass_ste(smmu,
arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
}
}
if (smmu->impl_ops && smmu->impl_ops->device_remove)
smmu->impl_ops->device_remove(smmu);
}
/*
* Probe all the compiled in implementations. Each one checks to see if it
* matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
* replaces the callers. Otherwise the original is returned or ERR_PTR.
*/
static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
{
struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
const struct arm_smmu_impl_ops *ops;
int ret;
if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
new_smmu = tegra241_cmdqv_probe(smmu);
if (new_smmu == ERR_PTR(-ENODEV))
return smmu;
if (IS_ERR(new_smmu))
return new_smmu;
ops = new_smmu->impl_ops;
if (ops) {
/* get_viommu_size and vsmmu_init ops must be paired */
if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) {
ret = -EINVAL;
goto err_remove;
}
}
ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
new_smmu);
if (ret)
return ERR_PTR(ret);
return new_smmu;
if (dev->of_node) {
ret = arm_smmu_device_dt_probe(pdev, smmu);
} else {
ret = arm_smmu_device_acpi_probe(pdev, smmu);
}
if (ret)
return ret;
smmu = arm_smmu_impl_probe(smmu);
if (IS_ERR(smmu))
return PTR_ERR(smmu);
/* Base address */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -EINVAL;
if (resource_size(res) < arm_smmu_resource_size(smmu)) {
dev_err(dev, "MMIO region too small (%pr)\n", res);
return -EINVAL;
}
ioaddr = res->start;
/*
* Don't map the IMPLEMENTATION DEFINED regions, since they may contain
* the PMCG registers which are reserved by the PMU driver.
*/
smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
if (IS_ERR(smmu->base))
return PTR_ERR(smmu->base);
MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
MODULE_AUTHOR("Will Deacon <will@kernel.org>");
MODULE_ALIAS("platform:arm-smmu-v3");
MODULE_LICENSE("GPL v2");
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.