staticint nd_region_invalidate_memregion(struct nd_region *nd_region)
{ int i, incoherent = 0;
for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm;
if (test_bit(NDD_INCOHERENT, &nvdimm->flags)) {
incoherent++; break;
}
}
if (!incoherent) return 0;
if (!cpu_cache_has_invalidate_memregion()) { if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)) {
dev_warn(
&nd_region->dev, "Bypassing cpu_cache_invalidate_memergion() for testing!\n"); goto out;
} else {
dev_err(&nd_region->dev, "Failed to synchronize CPU cache state\n"); return -ENXIO;
}
}
cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
out: for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm;
clear_bit(NDD_INCOHERENT, &nvdimm->flags);
}
return 0;
}
int nd_region_activate(struct nd_region *nd_region)
{ int i, j, rc, num_flush = 0; struct nd_region_data *ndrd; struct device *dev = &nd_region->dev;
size_t flush_data_size = sizeof(void *);
nvdimm_bus_lock(&nd_region->dev); for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm;
if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
nvdimm_bus_unlock(&nd_region->dev); return -EBUSY;
}
/* at least one null hint slot per-dimm for the "no-hint" case */
flush_data_size += sizeof(void *);
num_flush = min_not_zero(num_flush, nvdimm->num_flush); if (!nvdimm->num_flush) continue;
flush_data_size += nvdimm->num_flush * sizeof(void *);
}
nvdimm_bus_unlock(&nd_region->dev);
rc = nd_region_invalidate_memregion(nd_region); if (rc) return rc;
ndrd->hints_shift = ilog2(num_flush); for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm; int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
if (rc) return rc;
}
/* * Clear out entries that are duplicates. This should prevent the * extra flushings.
*/ for (i = 0; i < nd_region->ndr_mappings - 1; i++) { /* ignore if NULL already */ if (!ndrd_get_flush_wpq(ndrd, i, 0)) continue;
for (j = i + 1; j < nd_region->ndr_mappings; j++) if (ndrd_get_flush_wpq(ndrd, i, 0) ==
ndrd_get_flush_wpq(ndrd, j, 0))
ndrd_set_flush_wpq(ndrd, j, 0, NULL);
}
/** * nd_region_to_nstype() - region to an integer namespace type * @nd_region: region-device to interrogate * * This is the 'nstype' attribute of a region as well, an input to the * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match * namespace devices with namespace drivers.
*/ int nd_region_to_nstype(struct nd_region *nd_region)
{ if (is_memory(&nd_region->dev)) {
u16 i, label;
for (i = 0, label = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm;
if (test_bit(NDD_LABELING, &nvdimm->flags))
label++;
} if (label) return ND_DEVICE_NAMESPACE_PMEM; else return ND_DEVICE_NAMESPACE_IO;
}
if (is_memory(dev) && nd_set) /* pass, should be precluded by region_visible */; else return -ENXIO;
/* * The cookie to show depends on which specification of the * labels we are using. If there are not labels then default to * the v1.1 namespace label cookie definition. To read all this * data we need to wait for probing to settle.
*/
device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev); if (nd_region->ndr_mappings) { struct nd_mapping *nd_mapping = &nd_region->mapping[0]; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i];
/* * Flush in-flight updates and grab a snapshot of the available * size. Of course, this value is potentially invalidated the * memory nvdimm_bus_lock() is dropped, but that's userspace's * problem to not race itself.
*/
device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
available = nd_region_available_dpa(nd_region);
nvdimm_bus_unlock(dev);
device_unlock(dev);
/* * Ensure space-align is evenly divisible by the region * interleave-width because the kernel typically has no facility * to determine which DIMM(s), dimm-physical-addresses, would * contribute to the tail capacity in system-physical-address * space for the namespace.
*/
mappings = max_t(u32, 1, nd_region->ndr_mappings);
dpa = div_u64_rem(val, mappings, &remainder); if (!is_power_of_2(dpa) || dpa < PAGE_SIZE
|| val > region_size(nd_region) || remainder) return -EINVAL;
/* * Given that space allocation consults this value multiple * times ensure it does not change for the duration of the * allocation.
*/
nvdimm_bus_lock(dev);
nd_region->align = val;
nvdimm_bus_unlock(dev);
/* * 32 should be enough for a while, even in the presence of socket * interleave a 32-way interleave set is a degenerate case.
*/
REGION_MAPPING(0);
REGION_MAPPING(1);
REGION_MAPPING(2);
REGION_MAPPING(3);
REGION_MAPPING(4);
REGION_MAPPING(5);
REGION_MAPPING(6);
REGION_MAPPING(7);
REGION_MAPPING(8);
REGION_MAPPING(9);
REGION_MAPPING(10);
REGION_MAPPING(11);
REGION_MAPPING(12);
REGION_MAPPING(13);
REGION_MAPPING(14);
REGION_MAPPING(15);
REGION_MAPPING(16);
REGION_MAPPING(17);
REGION_MAPPING(18);
REGION_MAPPING(19);
REGION_MAPPING(20);
REGION_MAPPING(21);
REGION_MAPPING(22);
REGION_MAPPING(23);
REGION_MAPPING(24);
REGION_MAPPING(25);
REGION_MAPPING(26);
REGION_MAPPING(27);
REGION_MAPPING(28);
REGION_MAPPING(29);
REGION_MAPPING(30);
REGION_MAPPING(31);
lockdep_assert_held(&nd_mapping->lock);
list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
list_del(&label_ent->list);
kfree(label_ent);
}
}
/* * When a namespace is activated create new seeds for the next * namespace, or namespace-personality to be configured.
*/ void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
{
nvdimm_bus_lock(dev); if (nd_region->ns_seed == dev) {
nd_region_create_ns_seed(nd_region);
} elseif (is_nd_btt(dev)) { struct nd_btt *nd_btt = to_nd_btt(dev);
if (nd_region->btt_seed == dev)
nd_region_create_btt_seed(nd_region); if (nd_region->ns_seed == &nd_btt->ndns->dev)
nd_region_create_ns_seed(nd_region);
} elseif (is_nd_pfn(dev)) { struct nd_pfn *nd_pfn = to_nd_pfn(dev);
if (nd_region->pfn_seed == dev)
nd_region_create_pfn_seed(nd_region); if (nd_region->ns_seed == &nd_pfn->ndns->dev)
nd_region_create_ns_seed(nd_region);
} elseif (is_nd_dax(dev)) { struct nd_dax *nd_dax = to_nd_dax(dev);
if (nd_region->dax_seed == dev)
nd_region_create_dax_seed(nd_region); if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
nd_region_create_ns_seed(nd_region);
}
nvdimm_bus_unlock(dev);
}
/** * nd_region_acquire_lane - allocate and lock a lane * @nd_region: region id and number of lanes possible * * A lane correlates to a BLK-data-window and/or a log slot in the BTT. * We optimize for the common case where there are 256 lanes, one * per-cpu. For larger systems we need to lock to share lanes. For now * this implementation assumes the cost of maintaining an allocator for * free lanes is on the order of the lock hold time, so it implements a * static lane = cpu % num_lanes mapping. * * In the case of a BTT instance on top of a BLK namespace a lane may be * acquired recursively. We lock on the first instance. * * In the case of a BTT instance on top of PMEM, we only acquire a lane * for the BTT metadata updates.
*/ unsignedint nd_region_acquire_lane(struct nd_region *nd_region)
{ unsignedint cpu, lane;
migrate_disable();
cpu = smp_processor_id(); if (nd_region->num_lanes < nr_cpu_ids) { struct nd_percpu_lane *ndl_lock, *ndl_count;
lane = cpu % nd_region->num_lanes;
ndl_count = per_cpu_ptr(nd_region->lane, cpu);
ndl_lock = per_cpu_ptr(nd_region->lane, lane); if (ndl_count->count++ == 0)
spin_lock(&ndl_lock->lock);
} else
lane = cpu;
/* * PowerPC requires this alignment for memremap_pages(). All other archs * should be ok with SUBSECTION_SIZE (see memremap_compat_align()).
*/ #define MEMREMAP_COMPAT_ALIGN_MAX SZ_16M
int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
{ int rc = 0;
if (!nd_region->flush)
rc = generic_nvdimm_flush(nd_region); else { if (nd_region->flush(nd_region, bio))
rc = -EIO;
}
return rc;
} /** * generic_nvdimm_flush() - flush any posted write queues between the cpu and pmem media * @nd_region: interleaved pmem region
*/ int generic_nvdimm_flush(struct nd_region *nd_region)
{ struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); int i, idx;
/* * Try to encourage some diversity in flush hint addresses * across cpus assuming a limited number of flush hints.
*/
idx = this_cpu_read(flush_idx);
idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
/* * The pmem_wmb() is needed to 'sfence' all * previous writes such that they are architecturally visible for * the platform buffer flush. Note that we've already arranged for pmem * writes to avoid the cache via memcpy_flushcache(). The final * wmb() ensures ordering for the NVDIMM flush write.
*/
pmem_wmb(); for (i = 0; i < nd_region->ndr_mappings; i++) if (ndrd_get_flush_wpq(ndrd, i, 0))
writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
wmb();
return 0;
}
EXPORT_SYMBOL_GPL(nvdimm_flush);
/** * nvdimm_has_flush - determine write flushing requirements * @nd_region: interleaved pmem region * * Returns 1 if writes require flushing * Returns 0 if writes do not require flushing * Returns -ENXIO if flushing capability can not be determined
*/ int nvdimm_has_flush(struct nd_region *nd_region)
{ int i;
/* no nvdimm or pmem api == flushing capability unknown */ if (nd_region->ndr_mappings == 0
|| !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)) return -ENXIO;
/* Test if an explicit flush function is defined */ if (test_bit(ND_REGION_ASYNC, &nd_region->flags) && nd_region->flush) return 1;
/* Test if any flush hints for the region are available */ for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm;
/* flush hints present / available */ if (nvdimm->num_flush) return 1;
}
/* * The platform defines dimm devices without hints nor explicit flush, * assume platform persistence mechanism like ADR
*/ return 0;
}
EXPORT_SYMBOL_GPL(nvdimm_has_flush);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.