// SPDX-License-Identifier: GPL-2.0-or-later /* * 64-bit pSeries and RS/6000 setup code. * * Copyright (C) 1995 Linus Torvalds * Adapted from 'alpha' version by Gary Thomas * Modified by Cort Dougan (cort@cs.nmt.edu) * Modified by PPC64 Team, IBM Corp
*/
root = of_find_node_by_path("/"); if (root)
model = of_get_property(root, "model", NULL);
seq_printf(m, "machine\t\t: CHRP %s\n", model);
of_node_put(root); if (radix_enabled())
seq_printf(m, "MMU\t\t: Radix\n"); else
seq_printf(m, "MMU\t\t: Hash\n");
}
/* Initialize firmware assisted non-maskable interrupts if * the firmware supports this feature.
*/ staticvoid __init fwnmi_init(void)
{ unsignedlong system_reset_addr, machine_check_addr;
u8 *mce_data_buf; unsignedint i; int nr_cpus = num_possible_cpus(); #ifdef CONFIG_PPC_64S_HASH_MMU struct slb_entry *slb_ptr;
size_t size; #endif int ibm_nmi_register_token;
ibm_nmi_register_token = rtas_function_token(RTAS_FN_IBM_NMI_REGISTER); if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE) return;
ibm_nmi_interlock_token = rtas_function_token(RTAS_FN_IBM_NMI_INTERLOCK); if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE)) return;
/* If the kernel's not linked at zero we point the firmware at low
* addresses anyway, and use a trampoline to get to the real code. */
system_reset_addr = __pa(system_reset_fwnmi) - PHYSICAL_START;
machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;
/* * Allocate a chunk for per cpu buffer to hold rtas errorlog. * It will be used in real mode mce handler, hence it needs to be * below RMA.
*/
mce_data_buf = memblock_alloc_try_nid_raw(RTAS_ERROR_LOG_MAX * nr_cpus,
RTAS_ERROR_LOG_MAX, MEMBLOCK_LOW_LIMIT,
ppc64_rma_size, NUMA_NO_NODE); if (!mce_data_buf)
panic("Failed to allocate %d bytes below %pa for MCE buffer\n",
RTAS_ERROR_LOG_MAX * nr_cpus, &ppc64_rma_size);
#ifdef CONFIG_PPC_64S_HASH_MMU if (!radix_enabled()) { /* Allocate per cpu area to save old slb contents during MCE */
size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
slb_ptr = memblock_alloc_try_nid_raw(size, sizeof(struct slb_entry), MEMBLOCK_LOW_LIMIT,
ppc64_rma_size, NUMA_NO_NODE); if (!slb_ptr)
panic("Failed to allocate %zu bytes below %pa for slb area\n",
size, &ppc64_rma_size);
/* * Affix a device for the first timer to the platform bus if * we have firmware support for the H_WATCHDOG hypercall.
*/ static __init int pseries_wdt_init(void)
{ if (firmware_has_feature(FW_FEATURE_WATCHDOG))
platform_device_register_simple("pseries-wdt", 0, NULL, 0); return 0;
}
machine_subsys_initcall(pseries, pseries_wdt_init);
for_each_node_by_type(np, "interrupt-controller") { if (of_device_is_compatible(np, "chrp,iic")) {
found = np; break;
}
}
if (found == NULL) {
printk(KERN_DEBUG "pic: no ISA interrupt controller\n"); return;
}
cascade = irq_of_parse_and_map(found, 0); if (!cascade) {
printk(KERN_ERR "pic: failed to map cascade interrupt"); return;
}
pr_debug("pic: cascade mapped to irq %d\n", cascade);
for (old = of_node_get(found); old != NULL ; old = np) {
np = of_get_parent(old);
of_node_put(old); if (np == NULL) break; if (!of_node_name_eq(np, "pci")) continue;
addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL); if (addrp == NULL) continue;
naddr = of_n_addr_cells(np);
intack = addrp[naddr-1]; if (naddr > 1)
intack |= ((unsignedlong)addrp[naddr-2]) << 32;
} if (intack)
printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
i8259_init(found, intack);
of_node_put(found);
irq_set_chained_handler(cascade, pseries_8259_cascade);
}
staticvoid __init pseries_init_irq(void)
{ /* Try using a XIVE if available, otherwise use a XICS */ if (!xive_spapr_init()) {
xics_init();
pseries_setup_i8259_cascade();
}
}
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Allocate space for the dispatch trace log for all possible cpus * and register the buffers with the hypervisor. This is used for * computing time stolen by the hypervisor.
*/ staticint alloc_dispatch_logs(void)
{ if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0;
if (!dtl_cache) return 0;
alloc_dtl_buffers(0);
/* Register the DTL for the current (boot) cpu */
register_dtl_buffer(smp_processor_id());
DEFINE_PER_CPU(u64, idle_spurr_cycles);
DEFINE_PER_CPU(u64, idle_entry_purr_snap);
DEFINE_PER_CPU(u64, idle_entry_spurr_snap); staticvoid pseries_lpar_idle(void)
{ /* * Default handler to go into low thread priority and possibly * low power mode by ceding processor to hypervisor
*/
if (!prep_irq_for_idle()) return;
/* Indicate to hypervisor that we are idle. */
pseries_idle_prolog();
/* * Yield the processor to the hypervisor. We return if * an external interrupt occurs (which are driven prior * to returning here) or if a prod occurs from another * processor. When returning here, external interrupts * are enabled.
*/
cede_processor();
/* * Enable relocation on during exceptions. This has partition wide scope and * may take a while to complete, if it takes longer than one second we will * just give up rather than wasting any more time on this - if that turns out * to ever be a problem in practice we can move this into a kernel thread to * finish off the process later in boot.
*/ bool pseries_enable_reloc_on_exc(void)
{ long rc; unsignedint delay, total_delay = 0;
while (1) {
rc = enable_reloc_on_exceptions(); if (!H_IS_LONG_BUSY(rc)) { if (rc == H_P2) {
pr_info("Relocation on exceptions not" " supported\n"); returnfalse;
} elseif (rc != H_SUCCESS) {
pr_warn("Unable to enable relocation" " on exceptions: %ld\n", rc); returnfalse;
}
pseries_reloc_on_exception_enabled = true; returntrue;
}
delay = get_longbusy_msecs(rc);
total_delay += delay; if (total_delay > 1000) {
pr_warn("Warning: Giving up waiting to enable " "relocation on exceptions (%u msec)!\n",
total_delay); returnfalse;
}
void pseries_disable_reloc_on_exc(void)
{ long rc;
while (1) {
rc = disable_reloc_on_exceptions(); if (!H_IS_LONG_BUSY(rc)) break;
mdelay(get_longbusy_msecs(rc));
} if (rc == H_SUCCESS)
pseries_reloc_on_exception_enabled = false; else
pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
rc);
}
EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
#ifdef __LITTLE_ENDIAN__ void pseries_big_endian_exceptions(void)
{ long rc;
while (1) {
rc = enable_big_endian_exceptions(); if (!H_IS_LONG_BUSY(rc)) break;
mdelay(get_longbusy_msecs(rc));
}
/* * At this point it is unlikely panic() will get anything * out to the user, since this is called very late in kexec * but at least this will stop us from continuing on further * and creating an even more difficult to debug situation. * * There is a known problem when kdump'ing, if cpus are offline * the above call will fail. Rather than panicking again, keep * going and hope the kdump kernel is also little endian, which * it usually is.
*/ if (rc && !kdump_in_progress())
panic("Could not enable big endian exceptions");
}
void __init pseries_little_endian_exceptions(void)
{ long rc;
while (1) {
rc = enable_little_endian_exceptions(); if (!H_IS_LONG_BUSY(rc)) break;
mdelay(get_longbusy_msecs(rc));
} if (rc) {
ppc_md.progress("H_SET_MODE LE exception fail", 0);
panic("Could not enable little endian exceptions");
}
} #endif
/* create pci_dn's for DT nodes under this PHB */
pci_devs_phb_init_dynamic(phb);
pseries_msi_allocate_domains(phb);
}
of_node_put(root);
/* * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties * in chosen.
*/
of_pci_check_probe_only();
}
staticvoid init_cpu_char_feature_flags(struct h_cpu_char_result *result)
{ /* * The features below are disabled by default, so we instead look to see * if firmware has *enabled* them, and set them if so.
*/ if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST)
security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST);
if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK)
security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
/* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if * H_CPU_BEHAV_FAVOUR_SECURITY is.
*/ if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) {
security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
pseries_security_flavor = 0;
} elseif (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H)
pseries_security_flavor = 1; else
pseries_security_flavor = 2;
if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY)
security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS)
security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER)
security_ftr_clear(SEC_FTR_STF_BARRIER);
if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}
/* * Set features to the defaults assumed by init_cpu_char_feature_flags() * so it can set/clear again any features that might have changed after * migration, and in case the hypercall fails and it is not even called.
*/
powerpc_security_features = SEC_FTR_DEFAULT;
rc = plpar_get_cpu_characteristics(&result); if (rc == H_SUCCESS)
init_cpu_char_feature_flags(&result);
/* * We're the guest so this doesn't apply to us, clear it to simplify * handling of it elsewhere.
*/
security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
types = L1D_FLUSH_FALLBACK;
if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
types |= L1D_FLUSH_MTTRIG;
if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
types |= L1D_FLUSH_ORI;
#ifdef CONFIG_PCI_IOV enum rtas_iov_fw_value_map {
NUM_RES_PROPERTY = 0, /* Number of Resources */
LOW_INT = 1, /* Lowest 32 bits of Address */
START_OF_ENTRIES = 2, /* Always start of entry */
APERTURE_PROPERTY = 2, /* Start of entry+ to Aperture Size */
WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
NEXT_ENTRY = 7 /* Go to next entry on array */
};
enum get_iov_fw_value_index {
BAR_ADDRS = 1, /* Get Bar Address */
APERTURE_SIZE = 2, /* Get Aperture Size */
WDW_SIZE = 3 /* Get Window Size */
};
static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, enum get_iov_fw_value_index value)
{ constint *indexes; struct device_node *dn = pci_device_to_OF_node(dev); int i, num_res, ret = 0;
indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); if (!indexes) return 0;
/* * First element in the array is the number of Bars * returned. Search through the list to find the matching * bar
*/
num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); if (resno >= num_res) return 0; /* or an error */
i = START_OF_ENTRIES + NEXT_ENTRY * resno; switch (value) { case BAR_ADDRS:
ret = of_read_number(&indexes[i], 2); break; case APERTURE_SIZE:
ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2); break; case WDW_SIZE:
ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2); break;
}
return ret;
}
staticvoid of_pci_set_vf_bar_size(struct pci_dev *dev, constint *indexes)
{ struct resource *res;
resource_size_t base, size; int i, r, num_res;
num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS); for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
i += NEXT_ENTRY, r++) {
res = &dev->resource[r + PCI_IOV_RESOURCES];
base = of_read_number(&indexes[i], 2);
size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
res->flags = pci_parse_of_flags(of_read_number
(&indexes[i + LOW_INT], 1), 0);
res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
res->name = pci_name(dev);
res->start = base;
res->end = base + size - 1;
}
}
staticvoid of_pci_parse_iov_addrs(struct pci_dev *dev, constint *indexes)
{ struct resource *res, *root, *conflict;
resource_size_t base, size; int i, r, num_res;
/* * First element in the array is the number of Bars * returned. Search through the list to find the matching * bars assign them from firmware into resources structure.
*/
num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
i += NEXT_ENTRY, r++) {
res = &dev->resource[r + PCI_IOV_RESOURCES];
base = of_read_number(&indexes[i], 2);
size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
res->name = pci_name(dev);
res->start = base;
res->end = base + size - 1;
root = &iomem_resource;
dev_dbg(&dev->dev, "pSeries IOV BAR %d: trying firmware assignment %pR\n",
r + PCI_IOV_RESOURCES, res);
conflict = request_resource_conflict(root, res); if (conflict) {
dev_info(&dev->dev, "BAR %d: %pR conflicts with %s %pR\n",
r + PCI_IOV_RESOURCES, res,
conflict->name, conflict);
res->flags |= IORESOURCE_UNSET;
}
}
}
staticvoid pseries_disable_sriov_resources(struct pci_dev *pdev)
{ int i;
pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n"); for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
}
/*Firmware must support open sriov otherwise dont configure*/
indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); if (indexes)
of_pci_set_vf_bar_size(pdev, indexes); else
pseries_disable_sriov_resources(pdev);
}
if (!pdev->is_physfn) return; /*Firmware must support open sriov otherwise don't configure*/
indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); if (indexes)
of_pci_parse_iov_addrs(pdev, indexes); else
pseries_disable_sriov_resources(pdev);
}
/*Firmware must support open sriov otherwise report regular alignment*/
reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL); if (!reg) return pci_iov_resource_size(pdev, resno);
/* Discover PIC type and setup ppc_md accordingly */
smp_init_pseries();
// Setup CPU hotplug callbacks
pseries_cpu_hotplug_init();
if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE)) if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n");
/* openpic global configuration register (64-bit format). */ /* openpic Interrupt Source Unit pointer (64-bit format). */ /* python0 facility area (mmio) (64-bit format) REAL address. */
/* init to some ~sane value until calibrate_delay() runs */
loops_per_jiffy = 50000000;
fwnmi_init();
pseries_setup_security_mitigations(); if (!radix_enabled())
pseries_lpar_read_hblkrm_characteristics();
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
/* Find and initialize PCI host bridges */
init_pci_config_tokens();
of_reconfig_notifier_register(&pci_dn_reconfig_nb);
pSeries_nvram_init();
if (firmware_has_feature(FW_FEATURE_LPAR)) {
vpa_init(boot_cpuid);
if (lppaca_shared_proc()) {
static_branch_enable(&shared_processor);
pv_spinlocks_init(); #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
static_key_slow_inc(¶virt_steal_enabled); if (steal_acc)
static_key_slow_inc(¶virt_steal_rq_enabled); #endif
}
staticint pseries_set_xdabr(unsignedlong dabr, unsignedlong dabrx)
{ /* Have to set at least one bit in the DABRX according to PAPR */ if (dabrx == 0 && dabr == 0)
dabrx = DABRX_USER; /* PAPR says we can only set kernel and user bits */
dabrx &= DABRX_KERNEL | DABRX_USER;
/** * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string)
*/ staticvoid __init pSeries_cmo_feature_init(void)
{ staticstruct papr_sysparm_buf buf __initdata;
static_assert(sizeof(buf.val) >= CMO_MAXLENGTH); char *ptr, *key, *value, *end; int page_order = IOMMU_PAGE_SHIFT_4K;
pr_debug(" -> fw_cmo_feature_init()\n");
if (papr_sysparm_get(PAPR_SYSPARM_COOP_MEM_OVERCOMMIT_ATTRS, &buf)) {
pr_debug("CMO not available\n");
pr_debug(" <- fw_cmo_feature_init()\n"); return;
}
end = &buf.val[CMO_MAXLENGTH];
ptr = &buf.val[0];
key = value = ptr;
while (*ptr && (ptr <= end)) { /* Separate the key and value by replacing '=' with '\0' and * point the value at the string after the '='
*/ if (ptr[0] == '=') {
ptr[0] = '\0';
value = ptr + 1;
} elseif (ptr[0] == '\0' || ptr[0] == ',') { /* Terminate the string containing the key/value pair */
ptr[0] = '\0';
if (key == value) {
pr_debug("Malformed key/value pair\n"); /* Never found a '=', end processing */ break;
}
/* Page size is returned as the power of 2 of the page size, * convert to the page size in bytes before returning
*/
CMO_PageSize = 1 << page_order;
pr_debug("CMO_PageSize = %lu\n", CMO_PageSize);
/* * Early initialization. Relocation is on but do not reference unbolted pages
*/ staticvoid __init pseries_init(void)
{
pr_debug(" -> pseries_init()\n");
pseries_add_hw_description();
#ifdef CONFIG_HVC_CONSOLE if (firmware_has_feature(FW_FEATURE_LPAR))
hvc_vio_init_early(); #endif if (firmware_has_feature(FW_FEATURE_XDABR))
ppc_md.set_dabr = pseries_set_xdabr; elseif (firmware_has_feature(FW_FEATURE_DABR))
ppc_md.set_dabr = pseries_set_dabr;
if (firmware_has_feature(FW_FEATURE_SET_MODE))
ppc_md.set_dawr = pseries_set_dawr;
/** * pseries_power_off - tell firmware about how to power off the system. * * This function calls either the power-off rtas token in normal cases * or the ibm,power-off-ups token (if present & requested) in case of * a power failure. If power-off token is used, power on will only be * possible with power button press. If ibm,power-off-ups token is used * it will allow auto poweron after power is restored.
*/ staticvoid pseries_power_off(void)
{ int rc; int rtas_poweroff_ups_token = rtas_function_token(RTAS_FN_IBM_POWER_OFF_UPS);
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_POWER_OFF);
/* Cell blades firmware claims to be chrp while it's not. Until this * is fixed, we need to avoid those here.
*/ if (of_machine_is_compatible("IBM,CPBW-1.0") ||
of_machine_is_compatible("IBM,CBEA")) return 0;
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.14Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.