/* * Note all code and data in this file is protected by * ifs_sem. On HT systems all threads on a core will * execute together, but only the first thread on the * core will update results of the test.
*/
/* Max retries on the same chunk */ #define MAX_IFS_RETRIES 5
struct run_params { struct ifs_data *ifsd; union ifs_scan *activate; union ifs_status status;
};
struct sbaf_run_params { struct ifs_data *ifsd; int *retry_cnt; union ifs_sbaf *activate; union ifs_sbaf_status status;
};
/* * Number of TSC cycles that a logical CPU will wait for the other * logical CPU on the core in the WRMSR(ACTIVATE_SCAN).
*/ #define IFS_THREAD_WAIT 100000
staticconstchar * const scan_test_status[] = {
[IFS_NO_ERROR] = "SCAN no error",
[IFS_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
[IFS_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SCAN coordination.",
[IFS_POWER_MGMT_INADEQUATE_FOR_SCAN] = "Core Abort SCAN Response due to power management condition.",
[IFS_INVALID_CHUNK_RANGE] = "Non valid chunks in the range",
[IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
[IFS_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SCAN currently",
[IFS_UNASSIGNED_ERROR_CODE] = "Unassigned error code 0x7",
[IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
[IFS_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SCAN start",
[IFS_UNASSIGNED_ERROR_CODE_0xA] = "Unassigned error code 0xA",
[IFS_CORRUPTED_CHUNK] = "Scan operation aborted due to corrupted image. Try reloading",
};
staticvoid message_not_tested(struct device *dev, int cpu, union ifs_status status)
{ struct ifs_data *ifsd = ifs_get_data(dev);
/* * control_error is set when the microcode runs into a problem * loading the image from the reserved BIOS memory, or it has * been corrupted. Reloading the image may fix this issue.
*/ if (status.control_error) {
dev_warn(dev, "CPU(s) %*pbl: Scan controller error. Batch: %02x version: 0x%x\n",
cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version); return;
}
if (status.error_code < ARRAY_SIZE(scan_test_status)) {
dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n",
cpumask_pr_args(cpu_smt_mask(cpu)),
scan_test_status[status.error_code]);
} elseif (status.error_code == IFS_SW_TIMEOUT) {
dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
cpumask_pr_args(cpu_smt_mask(cpu)));
} elseif (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
dev_info(dev, "CPU(s) %*pbl: %s\n",
cpumask_pr_args(cpu_smt_mask(cpu)), "Not all scan chunks were executed. Maximum forward progress retries exceeded");
} else {
dev_info(dev, "CPU(s) %*pbl: SCAN unknown status %llx\n",
cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
}
}
staticvoid message_fail(struct device *dev, int cpu, union ifs_status status)
{ struct ifs_data *ifsd = ifs_get_data(dev);
/* * signature_error is set when the output from the scan chains does not * match the expected signature. This might be a transient problem (e.g. * due to a bit flip from an alpha particle or neutron). If the problem * repeats on a subsequent test, then it indicates an actual problem in * the core being tested.
*/ if (status.signature_error) {
dev_err(dev, "CPU(s) %*pbl: test signature incorrect. Batch: %02x version: 0x%x\n",
cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version);
}
}
/* Signature for chunk is bad, or scan test failed */ if (status.signature_error || status.control_error) returnfalse;
switch (err_code) { case IFS_NO_ERROR: case IFS_OTHER_THREAD_COULD_NOT_JOIN: case IFS_INTERRUPTED_BEFORE_RENDEZVOUS: case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN: case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT: case IFS_INTERRUPTED_DURING_EXECUTION: returntrue; case IFS_INVALID_CHUNK_RANGE: case IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS: case IFS_CORE_NOT_CAPABLE_CURRENTLY: case IFS_UNASSIGNED_ERROR_CODE: case IFS_UNASSIGNED_ERROR_CODE_0xA: case IFS_CORRUPTED_CHUNK: break;
} returnfalse;
}
/* * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus()
*/ staticvoid wait_for_sibling_cpu(atomic_t *t, longlong timeout)
{ int cpu = smp_processor_id(); conststruct cpumask *smt_mask = cpu_smt_mask(cpu); int all_cpus = cpumask_weight(smt_mask);
atomic_inc(t); while (atomic_read(t) < all_cpus) { if (timeout < SPINUNIT) return;
ndelay(SPINUNIT);
timeout -= SPINUNIT;
touch_nmi_watchdog();
}
}
/* * Execute the scan. Called "simultaneously" on all threads of a core * at high priority using the stop_cpus mechanism.
*/ staticint doscan(void *data)
{ int cpu = smp_processor_id(), start, stop; struct run_params *params = data; union ifs_status status; struct ifs_data *ifsd; int first;
/* * This WRMSR will wait for other HT threads to also write * to this MSR (at most for activate.delay cycles). Then it * starts scan of each requested chunk. The core scan happens * during the "execution" of the WRMSR. This instruction can * take up to 200 milliseconds (in the case where all chunks * are processed in a single pass) before it retires.
*/
wrmsrq(MSR_ACTIVATE_SCAN, params->activate->data);
rdmsrq(MSR_SCAN_STATUS, status.data);
/* Pass back the result of the scan */ if (cpu == first)
params->status = status;
return 0;
}
/* * Use stop_core_cpuslocked() to synchronize writing to MSR_ACTIVATE_SCAN * on all threads of the core to be tested. Loop if necessary to complete * run of all chunks. Include some defensive tests to make sure forward * progress is made, and that the whole test completes in a reasonable time.
*/ staticvoid ifs_test_core(int cpu, struct device *dev)
{ union ifs_status status = {}; union ifs_scan activate; unsignedlong timeout; struct ifs_data *ifsd; int to_start, to_stop; int status_chunk; struct run_params params; int retries;
staticconstchar * const sbaf_test_status[] = {
[IFS_SBAF_NO_ERROR] = "SBAF no error",
[IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
[IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.",
[IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3",
[IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non-valid sbaf bundles. Reload test image",
[IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
[IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently",
[IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7",
[IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
[IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start",
[IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid",
[IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk",
[IFS_SBAF_DID_NOT_START] = "SBAF operation did not start",
};
staticvoid sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data)
{ union ifs_sbaf_status status = (union ifs_sbaf_status)status_data;
if (status.error_code < ARRAY_SIZE(sbaf_test_status)) {
dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n",
cpumask_pr_args(cpu_smt_mask(cpu)),
sbaf_test_status[status.error_code]);
} elseif (status.error_code == IFS_SW_TIMEOUT) {
dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
cpumask_pr_args(cpu_smt_mask(cpu)));
} elseif (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
dev_info(dev, "CPU(s) %*pbl: %s\n",
cpumask_pr_args(cpu_smt_mask(cpu)), "Not all SBAF bundles executed. Maximum forward progress retries exceeded");
} else {
dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n",
cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
}
}
staticvoid sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status)
{ /* Failed signature check is set when SBAF signature did not match the expected value */ if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) {
dev_err(dev, "CPU(s) %*pbl: Failed signature check\n",
cpumask_pr_args(cpu_smt_mask(cpu)));
}
/* Failed to reach end of test */ if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) {
dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n",
cpumask_pr_args(cpu_smt_mask(cpu)));
}
}
/* Signature for chunk is bad, or scan test failed */ if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL ||
status.sbaf_status == SBAF_STATUS_TEST_FAIL) returnfalse;
switch (err_code) { case IFS_SBAF_NO_ERROR: case IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN: case IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS: case IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT: case IFS_SBAF_INTERRUPTED_DURING_EXECUTION: returntrue; case IFS_SBAF_UNASSIGNED_ERROR_CODE3: case IFS_SBAF_INVALID_BUNDLE_INDEX: case IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS: case IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY: case IFS_SBAF_UNASSIGNED_ERROR_CODE7: case IFS_SBAF_INVALID_PROGRAM_INDEX: case IFS_SBAF_CORRUPTED_CHUNK: case IFS_SBAF_DID_NOT_START: break;
} returnfalse;
}
/* * Execute the SBAF test. Called "simultaneously" on all threads of a core * at high priority using the stop_cpus mechanism.
*/ staticint dosbaf(void *data)
{ struct sbaf_run_params *run_params = data; int cpu = smp_processor_id(); union ifs_sbaf_status status; struct ifs_data *ifsd; int first;
ifsd = run_params->ifsd;
/* Only the first logical CPU on a core reports result */
first = cpumask_first(cpu_smt_mask(cpu));
wait_for_sibling_cpu(&sbaf_cpus_in, NSEC_PER_SEC);
/* * This WRMSR will wait for other HT threads to also write * to this MSR (at most for activate.delay cycles). Then it * starts scan of each requested bundle. The core test happens * during the "execution" of the WRMSR.
*/
wrmsrq(MSR_ACTIVATE_SBAF, run_params->activate->data);
rdmsrq(MSR_SBAF_STATUS, status.data);
trace_ifs_sbaf(ifsd->cur_batch, *run_params->activate, status);
/* Pass back the result of the test */ if (cpu == first)
run_params->status = status;
return 0;
}
staticvoid ifs_sbaf_test_core(int cpu, struct device *dev)
{ struct sbaf_run_params run_params; union ifs_sbaf_status status = {}; union ifs_sbaf activate; unsignedlong timeout; struct ifs_data *ifsd; int stop_bundle; int retries;
/* * Initiate per core test. It wakes up work queue threads on the target cpu and * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and * wait for all sibling threads to finish the scan test.
*/ int do_core_test(int cpu, struct device *dev)
{ conststruct ifs_test_caps *test = ifs_get_test_caps(dev); struct ifs_data *ifsd = ifs_get_data(dev); int ret = 0;
/* Prevent CPUs from being taken offline during the scan test */
cpus_read_lock();
if (!cpu_online(cpu)) {
dev_info(dev, "cannot test on the offline cpu %d\n", cpu);
ret = -EINVAL; goto out;
}
switch (test->test_num) { case IFS_TYPE_SAF: if (!ifsd->loaded)
ret = -EPERM; else
ifs_test_core(cpu, dev); break; case IFS_TYPE_ARRAY_BIST: if (ifsd->array_gen == ARRAY_GEN0)
ifs_array_test_core(cpu, dev); else
ifs_array_test_gen1(cpu, dev); break; case IFS_TYPE_SBAF: if (!ifsd->loaded)
ret = -EPERM; else
ifs_sbaf_test_core(cpu, dev); break; default:
ret = -EINVAL;
}
out:
cpus_read_unlock(); return ret;
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.1 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.