/* Struct holding a single performance metric */ struct papr_scm_perf_stat {
u8 stat_id[8];
__be64 stat_val;
} __packed;
/* Struct exchanged between kernel and PHYP for fetching drc perf stats */ struct papr_scm_perf_stats {
u8 eye_catcher[8]; /* Should be PAPR_SCM_PERF_STATS_VERSION */
__be32 stats_version; /* Number of stats following */
__be32 num_statistics; /* zero or more performance matrics */ struct papr_scm_perf_stat scm_statistic[];
} __packed;
/* private struct associated with each region */ struct papr_scm_priv { struct platform_device *pdev; struct device_node *dn;
uint32_t drc_index;
uint64_t blocks;
uint64_t block_size; int metadata_size; bool is_volatile; bool hcall_flush_required;
/* Check if we are stalled for some time */ if (H_IS_LONG_BUSY(rc)) {
msleep(get_longbusy_msecs(rc));
rc = H_BUSY;
} elseif (rc == H_BUSY) {
cond_resched();
}
} while (rc == H_BUSY);
/* * When the hypervisor cannot map all the requested memory in a single * hcall it returns H_BUSY and we call again with the token until * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS * leave the system in an undefined state, so we wait.
*/
token = 0;
do {
rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
p->blocks, BIND_ANY_ADDR, token);
token = ret[0]; if (!saved)
saved = ret[1];
cond_resched();
} while (rc == H_BUSY);
/* NB: unbind has the same retry requirements as drc_pmem_bind() */ do {
/* Unbind of all SCM resources associated with drcIndex */
rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC,
p->drc_index, token);
token = ret[0];
/* Check if we are stalled for some time */ if (H_IS_LONG_BUSY(rc)) {
msleep(get_longbusy_msecs(rc));
rc = H_BUSY;
} elseif (rc == H_BUSY) {
cond_resched();
}
/* Make sure the full region is bound. */
rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
p->drc_index, p->blocks - 1); if (rc) goto err_out;
end_addr = ret[0];
err_out:
dev_info(&p->pdev->dev, "Failed to query, trying an unbind followed by bind");
drc_pmem_unbind(p); return drc_pmem_bind(p);
}
/* * Query the Dimm performance stats from PHYP and copy them (if returned) to * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast * (num_stats + header) bytes. * - If buff_stats == NULL the return value is the size in bytes of the buffer * needed to hold all supported performance-statistics. * - If buff_stats != NULL and num_stats == 0 then we copy all known * performance-statistics to 'buff_stat' and expect to be large enough to * hold them. * - if buff_stats != NULL and num_stats > 0 then copy the requested * performance-statistics to buff_stats.
*/ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, struct papr_scm_perf_stats *buff_stats, unsignedint num_stats)
{ unsignedlong ret[PLPAR_HCALL_BUFSIZE];
size_t size;
s64 rc;
/* Setup the out buffer */ if (buff_stats) {
memcpy(buff_stats->eye_catcher,
PAPR_SCM_PERF_STATS_EYECATCHER, 8);
buff_stats->stats_version =
cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION);
buff_stats->num_statistics =
cpu_to_be32(num_stats);
/* * Calculate the buffer size based on num-stats provided * or use the prefetched max buffer length
*/ if (num_stats) /* Calculate size from the num_stats */
size = sizeof(struct papr_scm_perf_stats) +
num_stats * sizeof(struct papr_scm_perf_stat); else
size = p->stat_buffer_len;
} else { /* In case of no out buffer ignore the size */
size = 0;
}
/* Do the HCALL asking PHYP for info */
rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index,
buff_stats ? virt_to_phys(buff_stats) : 0,
size);
/* Check if the error was due to an unknown stat-id */ if (rc == H_PARTIAL) {
dev_err(&p->pdev->dev, "Unknown performance stats, Err:0x%016lX\n", ret[0]); return -ENOENT;
} elseif (rc == H_AUTHORITY) {
dev_info(&p->pdev->dev, "Permission denied while accessing performance stats"); return -EPERM;
} elseif (rc == H_UNSUPPORTED) {
dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); return -EOPNOTSUPP;
} elseif (rc != H_SUCCESS) {
dev_err(&p->pdev->dev, "Failed to query performance stats, Err:%lld\n", rc); return -EIO;
} elseif (!size) { /* Handle case where stat buffer size was requested */
dev_dbg(&p->pdev->dev, "Performance stats size %ld\n", ret[0]); return ret[0];
}
/* Successfully fetched the requested stats from phyp */
dev_dbg(&p->pdev->dev, "Performance stats returned %d stats\n",
be32_to_cpu(buff_stats->num_statistics)); return 0;
}
/* * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the * health information.
*/ staticint __drc_pmem_query_health(struct papr_scm_priv *p)
{ unsignedlong ret[PLPAR_HCALL_BUFSIZE];
u64 bitmap = 0; long rc;
/* issue the hcall */
rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); if (rc == H_SUCCESS)
bitmap = ret[0] & ret[1]; elseif (rc == H_FUNCTION)
dev_info_once(&p->pdev->dev, "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); else {
dev_err(&p->pdev->dev, "Failed to query health information, Err:%ld\n", rc); return -ENXIO;
}
p->lasthealth_jiffies = jiffies; /* Allow injecting specific health bits via inject mask. */ if (p->health_bitmap_inject_mask)
bitmap = (bitmap & ~p->health_bitmap_inject_mask) |
p->health_bitmap_inject_mask;
WRITE_ONCE(p->health_bitmap, bitmap);
dev_dbg(&p->pdev->dev, "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n",
ret[0], ret[1]);
return 0;
}
/* Min interval in seconds for assuming stable dimm health */ #define MIN_HEALTH_QUERY_INTERVAL 60
/* Query cached health info and if needed call drc_pmem_query_health */ staticint drc_pmem_query_health(struct papr_scm_priv *p)
{ unsignedlong cache_timeout; int rc;
/* Protect concurrent modifications to papr_scm_priv */
rc = mutex_lock_interruptible(&p->health_mutex); if (rc) return rc;
/* Jiffies offset for which the health data is assumed to be same */
cache_timeout = p->lasthealth_jiffies +
secs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL);
/* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ if (time_after(jiffies, cache_timeout))
rc = __drc_pmem_query_health(p); else /* Assume cached health data is valid */
rc = 0;
if (len >= 8) {
data = *(uint64_t *)(hdr->in_buf + data_offset);
data_be = cpu_to_be64(data);
wrote = 8;
} elseif (len >= 4) {
data = *(uint32_t *)(hdr->in_buf + data_offset);
data &= 0xffffffff;
data_be = cpu_to_be32(data);
wrote = 4;
} elseif (len >= 2) {
data = *(uint16_t *)(hdr->in_buf + data_offset);
data &= 0xffff;
data_be = cpu_to_be16(data);
wrote = 2;
} else {
data_be = *(uint8_t *)(hdr->in_buf + data_offset);
data_be &= 0xff;
wrote = 1;
}
ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index,
offset, data_be, wrote); if (ret == H_PARAMETER) /* bad DRC index */ return -ENODEV; if (ret) return -EINVAL; /* other invalid parameter */
}
return 0;
}
/* * Do a sanity checks on the inputs args to dimm-control function and return * '0' if valid. Validation of PDSM payloads happens later in * papr_scm_service_pdsm.
*/ staticint is_cmd_valid(struct nvdimm *nvdimm, unsignedint cmd, void *buf, unsignedint buf_len)
{ unsignedlong cmd_mask = PAPR_SCM_DIMM_CMD_MASK; struct nd_cmd_pkg *nd_cmd; struct papr_scm_priv *p; enum papr_pdsm pdsm;
/* Only dimm-specific calls are supported atm */ if (!nvdimm) return -EINVAL;
/* get the provider data from struct nvdimm */
p = nvdimm_provider_data(nvdimm);
/* Verify if the pdsm command is valid */ if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) {
dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n",
pdsm); return -EINVAL;
}
/* Have enough space to hold returned 'nd_pkg_pdsm' header */ if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) {
dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n",
pdsm); return -EINVAL;
}
}
/* Let the command be further processed */ return 0;
}
staticint papr_pdsm_fuel_gauge(struct papr_scm_priv *p, union nd_pdsm_payload *payload)
{ int rc, size;
u64 statval; struct papr_scm_perf_stat *stat; struct papr_scm_perf_stats *stats;
/* Silently fail if fetching performance metrics isn't supported */ if (!p->stat_buffer_len) return 0;
/* Allocate request buffer enough to hold single performance stat */
size = sizeof(struct papr_scm_perf_stats) + sizeof(struct papr_scm_perf_stat);
stats = kzalloc(size, GFP_KERNEL); if (!stats) return -ENOMEM;
stat = &stats->scm_statistic[0];
memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id));
stat->stat_val = 0;
/* Fetch the fuel gauge and populate it in payload */
rc = drc_pmem_query_stats(p, stats, 1); if (rc < 0) {
dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc); goto free_stats;
}
/* Add the dirty-shutdown-counter value to the pdsm */ staticint papr_pdsm_dsc(struct papr_scm_priv *p, union nd_pdsm_payload *payload)
{
payload->health.extension_flags |= PDSM_DIMM_DSC_VALID;
payload->health.dimm_dsc = p->dirty_shutdown_counter;
returnsizeof(struct nd_papr_pdsm_health);
}
/* Fetch the DIMM health info and populate it in provided package. */ staticint papr_pdsm_health(struct papr_scm_priv *p, union nd_pdsm_payload *payload)
{ int rc;
/* Ensure dimm health mutex is taken preventing concurrent access */
rc = mutex_lock_interruptible(&p->health_mutex); if (rc) goto out;
/* Always fetch upto date dimm health data ignoring cached values */
rc = __drc_pmem_query_health(p); if (rc) {
mutex_unlock(&p->health_mutex); goto out;
}
/* Update field dimm_health based on health_bitmap flags */ if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL)
payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; elseif (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL)
payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; elseif (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY)
payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY;
/* struct populated hence can release the mutex now */
mutex_unlock(&p->health_mutex);
/* Populate the fuel gauge meter in the payload */
papr_pdsm_fuel_gauge(p, payload); /* Populate the dirty-shutdown-counter field */
papr_pdsm_dsc(p, payload);
rc = sizeof(struct nd_papr_pdsm_health);
out: return rc;
}
/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ staticint papr_pdsm_smart_inject(struct papr_scm_priv *p, union nd_pdsm_payload *payload)
{ int rc;
u32 supported_flags = 0;
u64 inject_mask = 0, clear_mask = 0;
u64 mask;
/* Check for individual smart error flags and update inject/clear masks */ if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) {
supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; if (payload->smart_inject.fatal_enable)
inject_mask |= PAPR_PMEM_HEALTH_FATAL; else
clear_mask |= PAPR_PMEM_HEALTH_FATAL;
}
if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) {
supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; if (payload->smart_inject.unsafe_shutdown_enable)
inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; else
clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
}
/* Prevent concurrent access to dimm health bitmap related members */
rc = mutex_lock_interruptible(&p->health_mutex); if (rc) return rc;
/* Use inject/clear masks to set health_bitmap_inject_mask */
mask = READ_ONCE(p->health_bitmap_inject_mask);
mask = (mask & ~clear_mask) | inject_mask;
WRITE_ONCE(p->health_bitmap_inject_mask, mask);
/* Invalidate cached health bitmap */
p->lasthealth_jiffies = 0;
mutex_unlock(&p->health_mutex);
/* Return the supported flags back to userspace */
payload->smart_inject.flags = supported_flags;
returnsizeof(struct nd_papr_pdsm_health);
}
/* * 'struct pdsm_cmd_desc' * Identifies supported PDSMs' expected length of in/out payloads * and pdsm service function. * * size_in : Size of input payload if any in the PDSM request. * size_out : Size of output payload if any in the PDSM request. * service : Service function for the PDSM request. Return semantics: * rc < 0 : Error servicing PDSM and rc indicates the error. * rc >=0 : Serviced successfully and 'rc' indicate number of * bytes written to payload.
*/ struct pdsm_cmd_desc {
u32 size_in;
u32 size_out; int (*service)(struct papr_scm_priv *dimm, union nd_pdsm_payload *payload);
};
/* Holds all supported PDSMs' command descriptors */ staticconststruct pdsm_cmd_desc __pdsm_cmd_descriptors[] = {
[PAPR_PDSM_MIN] = {
.size_in = 0,
.size_out = 0,
.service = NULL,
}, /* New PDSM command descriptors to be added below */
/* Given a valid pdsm cmd return its command descriptor else return NULL */ staticinlineconststruct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd)
{ if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) return &__pdsm_cmd_descriptors[cmd];
return NULL;
}
/* * For a given pdsm request call an appropriate service function. * Returns errors if any while handling the pdsm command package.
*/ staticint papr_scm_service_pdsm(struct papr_scm_priv *p, struct nd_cmd_pkg *pkg)
{ /* Get the PDSM header and PDSM command */ struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; conststruct pdsm_cmd_desc *pdsc; int rc;
/* Fetch corresponding pdsm descriptor for validation and servicing */
pdsc = pdsm_cmd_desc(pdsm);
/* Validate pdsm descriptor */ /* Ensure that reserved fields are 0 */ if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) {
dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n",
pdsm); return -EINVAL;
}
/* If pdsm expects some input, then ensure that the size_in matches */ if (pdsc->size_in &&
pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) {
dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n",
pdsm, pkg->nd_size_in); return -EINVAL;
}
/* If pdsm wants to return data, then ensure that size_out matches */ if (pdsc->size_out &&
pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) {
dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n",
pdsm, pkg->nd_size_out); return -EINVAL;
}
/* Service the pdsm */ if (pdsc->service) {
dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm);
/* Allocate the buffer for phyp where stats are written */
stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); if (!stats) return -ENOMEM;
/* Ask phyp to return all dimm perf stats */
rc = drc_pmem_query_stats(p, stats, 0); if (rc) goto free_stats; /* * Go through the returned output buffer and print stats and * values. Since stat_id is essentially a char string of * 8 bytes, simply use the string format specifier to print it.
*/
seq_buf_init(&s, buf, PAGE_SIZE); for (index = 0, stat = stats->scm_statistic;
index < be32_to_cpu(stats->num_statistics);
++index, ++stat) {
seq_buf_printf(&s, "%.8s = 0x%016llX\n",
stat->stat_id,
be64_to_cpu(stat->stat_val));
}
if (evt->error_type != MCE_ERROR_TYPE_UE) return NOTIFY_DONE;
if (list_empty(&papr_nd_regions)) return NOTIFY_DONE;
/* * The physical address obtained here is PAGE_SIZE aligned, so get the * exact address from the effective address
*/
phys_addr = evt->u.ue_error.physical_address +
(evt->u.ue_error.effective_address & ~PAGE_MASK);
if (!evt->u.ue_error.physical_address_provided ||
!is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) return NOTIFY_DONE;
/* mce notifier is called from a process context, so mutex is safe */
mutex_lock(&papr_ndr_lock);
list_for_each_entry(p, &papr_nd_regions, region_list) { if (phys_addr >= p->res.start && phys_addr <= p->res.end) {
found = true; break;
}
}
if (found)
papr_scm_add_badblock(p->region, p->bus, phys_addr);
/* check we have all the required DT properties */ if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) {
dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn); return -ENODEV;
}
/* * open firmware platform device create won't update the NUMA * distance table. For PAPR SCM devices we use numa_map_to_online_node() * to find the nearest online NUMA node and that requires correct * distance table information.
*/
update_numa_distance(dn);
p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM;
/* Initialize the dimm mutex */
mutex_init(&p->health_mutex);
if (of_property_read_u64(dn, "ibm,persistence-failed-count",
&p->dirty_shutdown_counter))
p->dirty_shutdown_counter = 0;
/* We just need to ensure that set cookies are unique across */
uuid_parse(uuid_str, &uuid);
/* * The cookie1 and cookie2 are not really little endian. * We store a raw buffer representation of the * uuid string so that we can compare this with the label * area cookie irrespective of the endian configuration * with which the kernel is built. * * Historically we stored the cookie in the below format. * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa * cookie1 was 0xfd423b0b671b5172 * cookie2 was 0xaabce8cae35b1d8d
*/
export_uuid(uuid_raw, &uuid);
p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]);
p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]);
/* might be zero */
p->metadata_size = metadata_size;
p->pdev = pdev;
/* request the hypervisor to bind this region to somewhere in memory */
rc = drc_pmem_bind(p);
/* If phyp says drc memory still bound then force unbound and retry */ if (rc == H_OVERLAP)
rc = drc_pmem_query_n_bind(p);
/* setup the resource for the newly bound range */
p->res.start = p->bound_addr;
p->res.end = p->bound_addr + p->blocks * p->block_size - 1;
p->res.name = pdev->name;
p->res.flags = IORESOURCE_MEM;
/* Try retrieving the stat buffer and see if its supported */
stat_size = drc_pmem_query_stats(p, NULL, 0); if (stat_size > 0) {
p->stat_buffer_len = stat_size;
dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
p->stat_buffer_len);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.