/* * Clear the PE frozen state as it might be put into frozen state * in the last PCI remove path. It's not harmful to do so when the * PE is already in unfrozen state.
*/
rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
pr_warn("%s: Error %lld unfreezing PHB#%x-PE#%x\n",
__func__, rc, phb->hose->global_number, pe_no);
return &phb->ioda.pe_array[pe_no];
}
staticvoid pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
{ if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
pr_warn("%s: Invalid PE %x on PHB#%x\n",
__func__, pe_no, phb->hose->global_number); return;
}
mutex_lock(&phb->ioda.pe_alloc_mutex); if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
pr_debug("%s: PE %x was reserved on PHB#%x\n",
__func__, pe_no, phb->hose->global_number);
mutex_unlock(&phb->ioda.pe_alloc_mutex);
pnv_ioda_init_pe(phb, pe_no);
}
struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count)
{ struct pnv_ioda_pe *ret = NULL; int run = 0, pe, i;
mutex_lock(&phb->ioda.pe_alloc_mutex);
/* scan backwards for a run of @count cleared bits */ for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) { if (test_bit(pe, phb->ioda.pe_alloc)) {
run = 0; continue;
}
run++; if (run == count) break;
} if (run != count) goto out;
for (i = pe; i < pe + count; i++) {
set_bit(i, phb->ioda.pe_alloc);
pnv_ioda_init_pe(phb, i);
}
ret = &phb->ioda.pe_array[pe];
/* The default M64 BAR is shared by all PEs */ staticint pnv_ioda2_init_m64(struct pnv_phb *phb)
{ constchar *desc; struct resource *r;
s64 rc;
/* Configure the default M64 BAR */
rc = opal_pci_set_phb_mem_window(phb->opal_id,
OPAL_M64_WINDOW_TYPE,
phb->ioda.m64_bar_idx,
phb->ioda.m64_base,
0, /* unused */
phb->ioda.m64_size); if (rc != OPAL_SUCCESS) {
desc = "configuring"; goto fail;
}
/* Enable the default M64 BAR */
rc = opal_pci_phb_mmio_enable(phb->opal_id,
OPAL_M64_WINDOW_TYPE,
phb->ioda.m64_bar_idx,
OPAL_ENABLE_M64_SPLIT); if (rc != OPAL_SUCCESS) {
desc = "enabling"; goto fail;
}
/* * Exclude the segments for reserved and root bus PE, which * are first or last two PEs.
*/
r = &phb->hose->mem_resources[1]; if (phb->ioda.reserved_pe_idx == 0)
r->start += (2 * phb->ioda.m64_segsize); elseif (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
r->end -= (2 * phb->ioda.m64_segsize); else
pr_warn(" Cannot strip M64 segment for reserved PE#%x\n",
phb->ioda.reserved_pe_idx);
base = phb->ioda.m64_base;
sgsz = phb->ioda.m64_segsize; for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
r = &pdev->resource[i]; if (!r->parent || !pnv_pci_is_m64(phb, r)) continue;
start = ALIGN_DOWN(r->start - base, sgsz);
end = ALIGN(r->end - base, sgsz); for (segno = start / sgsz; segno < end / sgsz; segno++) { if (pe_bitmap)
set_bit(segno, pe_bitmap); else
pnv_ioda_reserve_pe(phb, segno);
}
}
}
/* Root bus shouldn't use M64 */ if (pci_is_root_bus(bus)) return NULL;
/* Allocate bitmap */
size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsignedlong));
pe_alloc = kzalloc(size, GFP_KERNEL); if (!pe_alloc) {
pr_warn("%s: Out of memory !\n",
__func__); return NULL;
}
/* Figure out reserved PE numbers by the PE */
pnv_ioda_reserve_m64_pe(bus, pe_alloc, all);
/* * the current bus might not own M64 window and that's all * contributed by its child buses. For the case, we needn't * pick M64 dependent PE#.
*/ if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) {
kfree(pe_alloc); return NULL;
}
/* * Figure out the master PE and put all slave PEs to master * PE's list to form compound PE.
*/
master_pe = NULL;
i = -1; while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe_num, i + 1)) <
phb->ioda.total_pe_num) {
pe = &phb->ioda.pe_array[i];
if (phb->type != PNV_PHB_IODA2) {
pr_info(" Not support M64 window\n"); return;
}
if (!firmware_has_feature(FW_FEATURE_OPAL)) {
pr_info(" Firmware too old to support M64 window\n"); return;
}
r = of_get_property(dn, "ibm,opal-m64-window", NULL); if (!r) {
pr_info(" No on %pOF\n",
dn); return;
}
/* * Find the available M64 BAR range and pickup the last one for * covering the whole 64-bits space. We support only one range.
*/ if (of_property_read_u32_array(dn, "ibm,opal-available-m64-ranges",
m64_range, 2)) { /* In absence of the property, assume 0..15 */
m64_range[0] = 0;
m64_range[1] = 16;
} /* We only support 64 bits in our allocator */ if (m64_range[1] > 63) {
pr_warn("%s: Limiting M64 range to 63 (from %d) on PHB#%x\n",
__func__, m64_range[1], phb->hose->global_number);
m64_range[1] = 63;
} /* Empty range, no m64 */ if (m64_range[1] <= m64_range[0]) {
pr_warn("%s: M64 empty, disabling M64 usage on PHB#%x\n",
__func__, phb->hose->global_number); return;
}
/* This lines up nicely with the display from processing OF ranges */
pr_info(" MEM 0x%016llx..0x%016llx -> 0x%016llx (M64 #%d..%d)\n",
res->start, res->end, pci_addr, m64_range[0],
m64_range[0] + m64_range[1] - 1);
/* Mark all M64 used up by default */
phb->ioda.m64_bar_alloc = (unsignedlong)-1;
/* Use last M64 BAR to cover M64 window */
m64_range[1]--;
phb->ioda.m64_bar_idx = m64_range[0] + m64_range[1];
pr_info(" Using M64 #%d as default window\n", phb->ioda.m64_bar_idx);
/* Mark remaining ones free */ for (i = m64_range[0]; i < m64_range[1]; i++)
clear_bit(i, &phb->ioda.m64_bar_alloc);
/* * Setup init functions for M64 based on IODA version, IODA3 uses * the IODA2 code.
*/
phb->init_m64 = pnv_ioda2_init_m64;
}
/* Sanity check on PE number */ if (pe_no < 0 || pe_no >= phb->ioda.total_pe_num) return OPAL_EEH_STOPPED_PERM_UNAVAIL;
/* * Fetch the master PE and the PE instance might be * not initialized yet.
*/
pe = &phb->ioda.pe_array[pe_no]; if (pe->flags & PNV_IODA_PE_SLAVE) {
pe = pe->master;
WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
pe_no = pe->pe_number;
}
/* * Clear PE frozen state. If it's master PE, we need * clear slave PE frozen state as well.
*/ if (is_add) {
opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); if (pe->flags & PNV_IODA_PE_MASTER) {
list_for_each_entry(slave, &pe->slaves, list)
opal_pci_eeh_freeze_clear(phb->opal_id,
slave->pe_number,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
}
}
/* * Associate PE in PELT. We need add the PE into the * corresponding PELT-V as well. Otherwise, the error * originated from the PE might contribute to other * PEs.
*/
ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add); if (ret) return ret;
/* For compound PEs, any one affects all of them */ if (pe->flags & PNV_IODA_PE_MASTER) {
list_for_each_entry(slave, &pe->slaves, list) {
ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add); if (ret) return ret;
}
}
while (parent) { struct pci_dn *pdn = pci_get_pdn(parent);
if (pdn && pdn->pe_number != IODA_INVALID_PE) {
rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
pe->pe_number,
OPAL_REMOVE_PE_FROM_DOMAIN); /* XXX What to do in case of error ? */
}
parent = parent->bus->self;
}
switch(count) { case 1: bcomp = OpalPciBusAll; break; case 2: bcomp = OpalPciBus7Bits; break; case 4: bcomp = OpalPciBus6Bits; break; case 8: bcomp = OpalPciBus5Bits; break; case 16: bcomp = OpalPciBus4Bits; break; case 32: bcomp = OpalPciBus3Bits; break; default:
dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
count); /* Do an exact match only */
bcomp = OpalPciBusAll;
}
rid_end = pe->rid + (count << 8);
} else {
bcomp = OpalPciBusAll;
dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
rid_end = pe->rid + 1;
}
/* * Associate PE in PELT. We need add the PE into the * corresponding PELT-V as well. Otherwise, the error * originated from the PE might contribute to other * PEs.
*/
rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
bcomp, dcomp, fcomp, OPAL_MAP_PE); if (rc) {
pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); return -ENXIO;
}
/* * Configure PELTV. NPUs don't have a PELTV table so skip * configuration on them.
*/ if (phb->type != PNV_PHB_NPU_OCAPI)
pnv_ioda_set_peltv(phb, pe, true);
if (!pdn) {
pr_err("%s: Device tree node not associated properly\n",
pci_name(dev)); return NULL;
} if (pdn->pe_number != IODA_INVALID_PE) return NULL;
pe = pnv_ioda_alloc_pe(phb, 1); if (!pe) {
pr_warn("%s: Not enough PE# available, disabling device\n",
pci_name(dev)); return NULL;
}
/* NOTE: We don't get a reference for the pointer in the PE * data structure, both the device and PE structures should be * destroyed at the same time. * * At some point we want to remove the PDN completely anyways
*/
pdn->pe_number = pe->pe_number;
pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
pe->pbus = NULL;
pe->mve_number = -1;
pe->rid = dev->bus->number << 8 | pdn->devfn;
pe->device_count++;
pe_info(pe, "Associated device to PE\n");
if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */
pnv_ioda_free_pe(pe);
pdn->pe_number = IODA_INVALID_PE;
pe->pdev = NULL; return NULL;
}
/* Put PE to the list */
mutex_lock(&phb->ioda.pe_list_mutex);
list_add_tail(&pe->list, &phb->ioda.pe_list);
mutex_unlock(&phb->ioda.pe_list_mutex); return pe;
}
/* * There're 2 types of PCI bus sensitive PEs: One that is compromised of * single PCI bus. Another one that contains the primary PCI bus and its * subordinate PCI devices and buses. The second type of PE is normally * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
*/ staticstruct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
{ struct pnv_phb *phb = pci_bus_to_pnvhb(bus); struct pnv_ioda_pe *pe = NULL; unsignedint pe_num;
/* * In partial hotplug case, the PE instance might be still alive. * We should reuse it instead of allocating a new one.
*/
pe_num = phb->ioda.pe_rmap[bus->number << 8]; if (WARN_ON(pe_num != IODA_INVALID_PE)) {
pe = &phb->ioda.pe_array[pe_num]; return NULL;
}
/* PE number for root bus should have been reserved */ if (pci_is_root_bus(bus))
pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx];
/* Check if PE is determined by M64 */ if (!pe)
pe = pnv_ioda_pick_m64_pe(bus, all);
/* The PE number isn't pinned by M64 */ if (!pe)
pe = pnv_ioda_alloc_pe(phb, 1);
if (!pe) {
pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
__func__, pci_domain_nr(bus), bus->number); return NULL;
}
if (all)
pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n",
&bus->busn_res.start, &bus->busn_res.end,
pe->pe_number); else
pe_info(pe, "Secondary bus %pad associated with PE#%x\n",
&bus->busn_res.start, pe->pe_number);
if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */
pnv_ioda_free_pe(pe);
pe->pbus = NULL; return NULL;
}
/* Put PE to the list */
list_add_tail(&pe->list, &phb->ioda.pe_list);
/* Check if the BDFN for this device is associated with a PE yet */
pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev)); if (!pe) { /* VF PEs should be pre-configured in pnv_pci_sriov_enable() */ if (WARN_ON(pdev->is_virtfn)) return;
pnv_pci_configure_bus(pdev->bus);
pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff);
/* * If we can't setup the IODA PE something has gone horribly * wrong and we can't enable DMA for the device.
*/ if (WARN_ON(!pe)) return;
} else {
pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number);
}
/* * We assume that bridges *probably* don't need to do any DMA so we can * skip allocating a TCE table, etc unless we get a non-bridge device.
*/ if (!pe->dma_setup_done && !pci_is_bridge(pdev)) { switch (phb->type) { case PNV_PHB_IODA2:
pnv_pci_ioda2_setup_dma_pe(phb, pe); break; default:
pr_warn("%s: No DMA for PHB#%x (type %d)\n",
__func__, phb->hose->global_number, phb->type);
}
}
if (pdn)
pdn->pe_number = pe->pe_number;
pe->device_count++;
/* PEs with a DMA weight of zero won't have a group */ if (pe->table_group.group)
iommu_add_device(&pe->table_group, &pdev->dev);
}
/* * Reconfigure TVE#0 to be usable as 64-bit DMA space. * * The first 4GB of virtual memory for a PE is reserved for 32-bit accesses. * Devices can only access more than that if bit 59 of the PCI address is set * by hardware, which indicates TVE#1 should be used instead of TVE#0. * Many PCI devices are not capable of addressing that many bits, and as a * result are limited to the 4GB of virtual memory made available to 32-bit * devices in TVE#0. * * In order to work around this, reconfigure TVE#0 to be suitable for 64-bit * devices by configuring the virtual memory past the first 4GB inaccessible * by 64-bit DMAs. This should only be used by devices that want more than * 4GB, and only on PEs that have no 32-bit devices. * * Currently this will only work on PHB3 (POWER8).
*/ staticint pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
{
u64 window_size, table_size, tce_count, addr; struct page *table_pages;
u64 tce_order = 28; /* 256MB TCEs */
__be64 *tces;
s64 rc;
/* * Window size needs to be a power of two, but needs to account for * shifting memory by the 4GB offset required to skip 32bit space.
*/
window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32));
tce_count = window_size >> tce_order;
table_size = tce_count << 3;
if (table_size < PAGE_SIZE)
table_size = PAGE_SIZE;
table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL,
get_order(table_size)); if (!table_pages) goto err;
tces = page_address(table_pages); if (!tces) goto err;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) returnfalse;
pe = &phb->ioda.pe_array[pdn->pe_number]; if (pe->tce_bypass_enabled) {
u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; if (dma_mask >= top) returntrue;
}
/* * If the device can't set the TCE bypass bit but still wants * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to * bypass the 32-bit region and be usable for 64-bit DMAs. * The device needs to be able to address all of this space.
*/ if (dma_mask >> 32 &&
dma_mask > (memory_hotplug_max() + (1ULL << 32)) && /* pe->pdev should be set if it's a single device, pe->pbus if not */
(pe->device_count == 1 || !pe->pbus) &&
phb->model == PNV_PHB_MODEL_PHB3) { /* Configure the bypass mode */
s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe); if (rc) returnfalse; /* 4GB offset bypasses 32-bit space */
pdev->dev.archdata.dma_offset = (1ULL << 32); returntrue;
}
/* * Map TCE table through TVT. The TVE index is the PE number * shifted by 1 bit for 32-bits DMA space.
*/
rc = opal_pci_map_pe_dma_window(phb->opal_id,
pe->pe_number,
(pe->pe_number << 1) + num,
tbl->it_indirect_levels + 1,
__pa(tbl->it_base),
size << 3,
IOMMU_PAGE_SIZE(tbl)); if (rc) {
pe_err(pe, "Failed to configure TCE table, err %lld\n", rc); return rc;
}
/* * crashkernel= specifies the kdump kernel's maximum memory at * some offset and there is no guaranteed the result is a power * of 2, which will cause errors later.
*/ const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max());
/* * In memory constrained environments, e.g. kdump kernel, the * DMA window can be larger than available memory, which will * cause errors later.
*/ const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_PAGE_ORDER);
/* * We create the default window as big as we can. The constraint is * the max order of allocation possible. The TCE table is likely to * end up being multilevel and with on-demand allocation in place, * the initial use is not going to be huge as the default window aims * to support crippled devices (i.e. not fully 64bit DMAble) only.
*/ /* iommu_table::it_map uses 1 bit per IOMMU page, hence 8 */ const u64 window_size = min((maxblock * 8) << PAGE_SHIFT, max_memory); /* Each TCE level cannot exceed maxblock so go multilevel if needed */ unsignedlong tces_order = ilog2(window_size >> PAGE_SHIFT); unsignedlong tcelevel_order = ilog2(maxblock >> 3); unsignedint levels = tces_order / tcelevel_order;
if (tces_order % tcelevel_order)
levels += 1; /* * We try to stick to default levels (which is >1 at the moment) in * order to save memory by relying on on-demain TCE level allocation.
*/
levels = max_t(unsignedint, levels, POWERNV_IOMMU_DEFAULT_LEVELS);
/* We use top part of 32bit space for MMIO so exclude it from DMA */
res_start = 0;
res_end = 0; if (window_size > pe->phb->ioda.m32_pci_base) {
res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift;
res_end = min(window_size, SZ_4G) >> tbl->it_page_shift;
}
tbl->it_index = (pe->phb->hose->global_number << 16) | pe->pe_number; if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end))
rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); else
rc = -ENOMEM; if (rc) {
pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc);
iommu_tce_table_put(tbl);
tbl = NULL; /* This clears iommu_table_base below */
} if (!pnv_iommu_bypass_disabled)
pnv_pci_ioda2_set_bypass(pe, true);
/* * Set table base for the case of IOMMU DMA use. Usually this is done * from dma_dev_setup() which is not called when a device is returned * from VFIO so do it here.
*/ if (pe->pdev)
set_iommu_table_base(&pe->pdev->dev, tbl);
/* See the comment about iommu_ops above */ if (pe->table_group.tables[0]) return;
pnv_pci_ioda2_setup_default_config(pe); if (pe->pbus)
pnv_ioda_setup_bus_dma(pe, pe->pbus);
}
/* * Called from KVM in real mode to EOI passthru interrupts. The ICP * EOI is handled directly in KVM in kvmppc_deliver_irq_passthru(). * * The IRQ data is mapped in the PCI-MSI domain and the EOI OPAL call * needs an HW IRQ number mapped in the XICS IRQ domain. The HW IRQ * numbers of the in-the-middle MSI domain are vector numbers and it's * good enough for OPAL. Use that.
*/
int64_t pnv_opal_pci_msi_eoi(struct irq_data *d)
{ struct pci_controller *hose = irq_data_get_irq_chip_data(d->parent_data); struct pnv_phb *phb = hose->private_data;
/* * The msi_free() op is called before irq_domain_free_irqs_top() when * the handler data is still available. Use that to clear the XIVE * controller.
*/ staticvoid pnv_msi_ops_msi_free(struct irq_domain *domain, struct msi_domain_info *info, unsignedint irq)
{ if (xive_enabled())
xive_irq_free_data(irq);
}
/* * The IRQ data is mapped in the MSI domain in which HW IRQ numbers * correspond to vector numbers.
*/ staticvoid pnv_msi_eoi(struct irq_data *d)
{ struct pci_controller *hose = irq_data_get_irq_chip_data(d); struct pnv_phb *phb = hose->private_data;
if (phb->model == PNV_PHB_MODEL_PHB3) { /* * The EOI OPAL call takes an OPAL HW IRQ number but * since it is translated into a vector number in * OPAL, use that directly.
*/
WARN_ON_ONCE(opal_pci_msi_eoi(phb->opal_id, d->hwirq));
}
/* * This function is supposed to be called on basis of PE from top * to bottom style. So the I/O or MMIO segment assigned to * parent PE could be overridden by its child PEs if necessary.
*/ staticvoid pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
{ struct pci_dev *pdev; int i;
/* * NOTE: We only care PCI bus based PE for now. For PCI * device based PE, for example SRIOV sensitive VF should * be figured out later.
*/
BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
list_for_each_entry(pdev, &pe->pbus->devices, bus_list) { for (i = 0; i <= PCI_ROM_RESOURCE; i++)
pnv_ioda_setup_pe_res(pe, &pdev->resource[i]);
/* * If the PE contains all subordinate PCI buses, the * windows of the child bridges should be mapped to * the PE as well.
*/ if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev)) continue; for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
pnv_ioda_setup_pe_res(pe,
&pdev->resource[PCI_BRIDGE_RESOURCES + i]);
}
}
/* Retrieve the diag data from firmware */
ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
phb->diag_data_size); if (ret != OPAL_SUCCESS) return -EIO;
/* Print the diag data to the kernel log */
pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data); return 0;
}
/* Empty bus ? bail */ if (list_empty(&bus->devices)) return;
/* * If there's a bridge associated with that bus enable it. This works * around races in the generic code if the enabling is done during * parallel probing. This can be removed once those races have been * fixed.
*/ if (dev) { int rc = pci_enable_device(dev); if (rc)
pci_err(dev, "Error enabling bridge (%d)\n", rc);
pci_set_master(dev);
}
/* Perform the same to child busses */
list_for_each_entry(child, &bus->children, node)
pnv_pci_enable_bridge(child);
}
/* * Returns the alignment for I/O or memory windows for P2P * bridges. That actually depends on how PEs are segmented. * For now, we return I/O or M32 segment size for PE sensitive * P2P bridges. Otherwise, the default values (4KiB for I/O, * 1MiB for memory) will be returned. * * The current PCI bus might be put into one PE, which was * create against the parent PCI bridge. For that case, we * needn't enlarge the alignment so that we can save some * resources.
*/ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, unsignedlong type)
{ struct pnv_phb *phb = pci_bus_to_pnvhb(bus); int num_pci_bridges = 0; struct pci_dev *bridge;
bridge = bus->self; while (bridge) { if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
num_pci_bridges++; if (num_pci_bridges >= 2) return 1;
}
bridge = bridge->bus->self;
}
/* * We fall back to M32 if M64 isn't supported. We enforce the M64 * alignment for any 64-bit resource, PCIe doesn't care and * bridges only do 64-bit prefetchable anyway.
*/ if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type)) return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize;
return phb->ioda.io_segsize;
}
/* * We are updating root port or the upstream port of the * bridge behind the root port with PHB's windows in order * to accommodate the changes on required resources during * PCI (slot) hotplug, which is connected to either root * port or the downstream ports of PCIe switch behind the * root port.
*/ staticvoid pnv_pci_fixup_bridge_resources(struct pci_bus *bus, unsignedlong type)
{ struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; struct pci_dev *bridge = bus->self; struct resource *r, *w; bool msi_region = false; int i;
/* Check if we need apply fixup to the bridge's windows */ if (!pci_is_root_bus(bridge->bus) &&
!pci_is_root_bus(bridge->bus->self->bus)) return;
/* Fixup the resources */ for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
r = &bridge->resource[PCI_BRIDGE_RESOURCES + i]; if (!r->flags || !r->parent) continue;
w = NULL; if (r->flags & type & IORESOURCE_IO)
w = &hose->io_resource; elseif (pnv_pci_is_m64(phb, r) &&
(type & IORESOURCE_PREFETCH) &&
phb->ioda.m64_segsize)
w = &hose->mem_resources[1]; elseif (r->flags & type & IORESOURCE_MEM) {
w = &hose->mem_resources[0];
msi_region = true;
}
r->start = w->start;
r->end = w->end;
/* The 64KB 32-bits MSI region shouldn't be included in * the 32-bits bridge window. Otherwise, we can see strange * issues. One of them is EEH error observed on Garrison. * * Exclude top 1MB region which is the minimal alignment of * 32-bits bridge window.
*/ if (msi_region) {
r->end += 0x10000;
r->end -= 0x100000;
}
}
}
/* Don't assign PE to PCI bus, which doesn't have subordinate devices */ if (WARN_ON(list_empty(&bus->devices))) return;
/* Reserve PEs according to used M64 resources */
pnv_ioda_reserve_m64_pe(bus, NULL, all);
/* * Assign PE. We might run here because of partial hotplug. * For the case, we just pick up the existing PE and should * not allocate resources again.
*/
pe = pnv_ioda_setup_bus_PE(bus, all); if (!pe) return;
/* Prevent enabling devices for which we couldn't properly * assign a PE
*/ staticbool pnv_pci_enable_device_hook(struct pci_dev *dev)
{ struct pci_dn *pdn;
pdn = pci_get_pdn(dev); if (!pdn || pdn->pe_number == IODA_INVALID_PE) {
pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n"); returnfalse;
}
/* Release slave PEs in the compound PE */ if (pe->flags & PNV_IODA_PE_MASTER) {
list_for_each_entry_safe(slave, tmp, &pe->slaves, list) {
list_del(&slave->list);
pnv_ioda_free_pe(slave);
}
}
/* * The PE for root bus can be removed because of hotplug in EEH * recovery for fenced PHB error. We need to mark the PE dead so * that it can be populated again in PCI hot add path. The PE * shouldn't be destroyed as it's the global reserved resource.
*/ if (phb->ioda.root_pe_idx == pe->pe_number) return;
/* The VF PE state is torn down when sriov_disable() is called */ if (pdev->is_virtfn) return;
if (!pdn || pdn->pe_number == IODA_INVALID_PE) return;
#ifdef CONFIG_PCI_IOV /* * FIXME: Try move this to sriov_disable(). It's here since we allocate * the iov state at probe time since we need to fiddle with the IOV * resources.
*/ if (pdev->is_physfn)
kfree(pdev->dev.archdata.iov_data); #endif
/* * PCI hotplug can happen as part of EEH error recovery. The @pdn * isn't removed and added afterwards in this scenario. We should * set the PE number in @pdn to an invalid one. Otherwise, the PE's * device count is decreased on removing devices while failing to * be increased on adding devices. It leads to unbalanced PE's device * count and eventually make normal PCI hotplug path broken.
*/
pe = &phb->ioda.pe_array[pdn->pe_number];
pdn->pe_number = IODA_INVALID_PE;
WARN_ON(--pe->device_count < 0); if (pe->device_count == 0)
pnv_ioda_release_pe(pe);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.