kernel-ark/arch/i386/pci/mmconfig.c

225 lines
5.5 KiB
C
Raw Normal View History

/*
* Copyright (C) 2004 Matthew Wilcox <matthew@wil.cx>
* Copyright (C) 2004 Intel Corp.
*
* This code is released under the GNU General Public License version 2.
*/
/*
* mmconfig.c - Low-level direct PCI config space access via MMCONFIG
*/
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/acpi.h>
#include <asm/e820.h>
#include "pci.h"
[PATCH] PCI: fix issues with extended conf space when MMCONFIG disabled because of e820 On 15 Jun 2006 03:45:10 +0200, Andi Kleen wrote: > Anyways I would say that if the BIOS can't get MCFG right then > it's likely not been validated on that board and shouldn't be used. According to Petr Vandrovec: ... "What is important (and checked) is address of MMCONFIG reported by MCFG table... Unfortunately code does not bother with printing that address :-( "Another problem is that code has hardcoded that MMCONFIG area is 256MB large. Unfortunately for the code PCI specification allows any power of two between 2MB and 256MB if vendor knows that such amount of busses (from 2 to 128) will be sufficient for system. With notebook it is quite possible that not full 8 bits are implemented for MMCONFIG bus number." So here is a patch. Unfortunately my system still fails the test because it doesn't reserve any part of the MMCONFIG area, but this may fix others. Booted on x86_64, only compiled on i386. x86_64 still remaps the max area (256MB) even though only 2MB is checked... but 2.6.16 had no check at all so it is still better. PCI: reduce size of x86 MMCONFIG reserved area check 1. Print the address of the MMCONFIG area when the test for that area being reserved fails. 2. Only check if the first 2MB is reserved, as that is the minimum. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Acked-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2006-06-15 08:41:52 +00:00
/* aperture is up to 256MB but BIOS may reserve less */
#define MMCONFIG_APER_MIN (2 * 1024*1024)
#define MMCONFIG_APER_MAX (256 * 1024*1024)
/* Assume systems with more busses have correct MCFG */
#define MAX_CHECK_BUS 16
#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
/* The base address of the last MMCONFIG device accessed */
static u32 mmcfg_last_accessed_device;
[PATCH] arch/i386/pci/mmconfig.c tlb flush fix We use the fixmap for accessing pci config space in pci_mmcfg_read/write(). The problem is in pci_exp_set_dev_base(). It is caching a last accessed address to avoid calling set_fixmap_nocache() whenever pci_mmcfg_read/write() is used. static inline void pci_exp_set_dev_base(int bus, int devfn) { u32 dev_base = base | (bus << 20) | (devfn << 12); if (dev_base != mmcfg_last_accessed_device) { mmcfg_last_accessed_device = dev_base; set_fixmap_nocache(FIX_PCIE_MCFG, dev_base); } } cpu0 cpu1 --------------------------------------------------------------------------- pci_mmcfg_read("device-A") pci_exp_set_dev_base() set_fixmap_nocache() pci_mmcfg_read("device-B") pci_exp_set_dev_base() set_fixmap_nocache() pci_mmcfg_read("device-B") pci_exp_set_dev_base() /* doesn't flush tlb */ But if cpus accessed the above order, the second pci_mmcfg_read() on cpu0 doesn't flush the TLB, because "mmcfg_last_accessed_device" is device-B. So, second pci_mmcfg_read() on cpu0 accesses a device-A via a previous TLB cache. This problem became the cause of several strange behavior. This patches fixes this situation by adds "mmcfg_last_accessed_cpu" check. [ Alternatively, we could make a per-cpu mapping area or something. Not that it's probably worth it, but if we wanted to avoid all locking and instead just disable preemption, that would be the way to go. --Linus ] Signed-off-by: OGAWA Hirofumi <hogawa@miraclelinux.com> Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-23 01:00:43 +00:00
static int mmcfg_last_accessed_cpu;
static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32);
/*
* Functions for accessing PCI configuration space with MMCONFIG accesses
*/
static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn)
{
int cfg_num = -1;
struct acpi_mcfg_allocation *cfg;
if (seg == 0 && bus < MAX_CHECK_BUS &&
test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots))
return 0;
while (1) {
++cfg_num;
if (cfg_num >= pci_mmcfg_config_num) {
break;
}
cfg = &pci_mmcfg_config[cfg_num];
if (cfg->pci_segment != seg)
continue;
if ((cfg->start_bus_number <= bus) &&
(cfg->end_bus_number >= bus))
return cfg->address;
}
/* Handle more broken MCFG tables on Asus etc.
They only contain a single entry for bus 0-0. Assume
this applies to all busses. */
cfg = &pci_mmcfg_config[0];
if (pci_mmcfg_config_num == 1 &&
cfg->pci_segment == 0 &&
(cfg->start_bus_number | cfg->end_bus_number) == 0)
return cfg->address;
/* Fall back to type 0 */
return 0;
}
/*
* This is always called under pci_config_lock
*/
static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn)
{
u32 dev_base = base | (bus << 20) | (devfn << 12);
[PATCH] arch/i386/pci/mmconfig.c tlb flush fix We use the fixmap for accessing pci config space in pci_mmcfg_read/write(). The problem is in pci_exp_set_dev_base(). It is caching a last accessed address to avoid calling set_fixmap_nocache() whenever pci_mmcfg_read/write() is used. static inline void pci_exp_set_dev_base(int bus, int devfn) { u32 dev_base = base | (bus << 20) | (devfn << 12); if (dev_base != mmcfg_last_accessed_device) { mmcfg_last_accessed_device = dev_base; set_fixmap_nocache(FIX_PCIE_MCFG, dev_base); } } cpu0 cpu1 --------------------------------------------------------------------------- pci_mmcfg_read("device-A") pci_exp_set_dev_base() set_fixmap_nocache() pci_mmcfg_read("device-B") pci_exp_set_dev_base() set_fixmap_nocache() pci_mmcfg_read("device-B") pci_exp_set_dev_base() /* doesn't flush tlb */ But if cpus accessed the above order, the second pci_mmcfg_read() on cpu0 doesn't flush the TLB, because "mmcfg_last_accessed_device" is device-B. So, second pci_mmcfg_read() on cpu0 accesses a device-A via a previous TLB cache. This problem became the cause of several strange behavior. This patches fixes this situation by adds "mmcfg_last_accessed_cpu" check. [ Alternatively, we could make a per-cpu mapping area or something. Not that it's probably worth it, but if we wanted to avoid all locking and instead just disable preemption, that would be the way to go. --Linus ] Signed-off-by: OGAWA Hirofumi <hogawa@miraclelinux.com> Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-23 01:00:43 +00:00
int cpu = smp_processor_id();
if (dev_base != mmcfg_last_accessed_device ||
cpu != mmcfg_last_accessed_cpu) {
mmcfg_last_accessed_device = dev_base;
[PATCH] arch/i386/pci/mmconfig.c tlb flush fix We use the fixmap for accessing pci config space in pci_mmcfg_read/write(). The problem is in pci_exp_set_dev_base(). It is caching a last accessed address to avoid calling set_fixmap_nocache() whenever pci_mmcfg_read/write() is used. static inline void pci_exp_set_dev_base(int bus, int devfn) { u32 dev_base = base | (bus << 20) | (devfn << 12); if (dev_base != mmcfg_last_accessed_device) { mmcfg_last_accessed_device = dev_base; set_fixmap_nocache(FIX_PCIE_MCFG, dev_base); } } cpu0 cpu1 --------------------------------------------------------------------------- pci_mmcfg_read("device-A") pci_exp_set_dev_base() set_fixmap_nocache() pci_mmcfg_read("device-B") pci_exp_set_dev_base() set_fixmap_nocache() pci_mmcfg_read("device-B") pci_exp_set_dev_base() /* doesn't flush tlb */ But if cpus accessed the above order, the second pci_mmcfg_read() on cpu0 doesn't flush the TLB, because "mmcfg_last_accessed_device" is device-B. So, second pci_mmcfg_read() on cpu0 accesses a device-A via a previous TLB cache. This problem became the cause of several strange behavior. This patches fixes this situation by adds "mmcfg_last_accessed_cpu" check. [ Alternatively, we could make a per-cpu mapping area or something. Not that it's probably worth it, but if we wanted to avoid all locking and instead just disable preemption, that would be the way to go. --Linus ] Signed-off-by: OGAWA Hirofumi <hogawa@miraclelinux.com> Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-23 01:00:43 +00:00
mmcfg_last_accessed_cpu = cpu;
set_fixmap_nocache(FIX_PCIE_MCFG, dev_base);
}
}
static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
unsigned long flags;
u32 base;
if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
*value = -1;
return -EINVAL;
}
base = get_base_addr(seg, bus, devfn);
if (!base)
return pci_conf1_read(seg,bus,devfn,reg,len,value);
spin_lock_irqsave(&pci_config_lock, flags);
pci_exp_set_dev_base(base, bus, devfn);
switch (len) {
case 1:
*value = readb(mmcfg_virt_addr + reg);
break;
case 2:
*value = readw(mmcfg_virt_addr + reg);
break;
case 4:
*value = readl(mmcfg_virt_addr + reg);
break;
}
spin_unlock_irqrestore(&pci_config_lock, flags);
return 0;
}
static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 value)
{
unsigned long flags;
u32 base;
if ((bus > 255) || (devfn > 255) || (reg > 4095))
return -EINVAL;
base = get_base_addr(seg, bus, devfn);
if (!base)
return pci_conf1_write(seg,bus,devfn,reg,len,value);
spin_lock_irqsave(&pci_config_lock, flags);
pci_exp_set_dev_base(base, bus, devfn);
switch (len) {
case 1:
writeb(value, mmcfg_virt_addr + reg);
break;
case 2:
writew(value, mmcfg_virt_addr + reg);
break;
case 4:
writel(value, mmcfg_virt_addr + reg);
break;
}
spin_unlock_irqrestore(&pci_config_lock, flags);
return 0;
}
static struct pci_raw_ops pci_mmcfg = {
.read = pci_mmcfg_read,
.write = pci_mmcfg_write,
};
/* K8 systems have some devices (typically in the builtin northbridge)
that are only accessible using type1
Normally this can be expressed in the MCFG by not listing them
and assigning suitable _SEGs, but this isn't implemented in some BIOS.
Instead try to discover all devices on bus 0 that are unreachable using MM
and fallback for them. */
static __init void unreachable_devices(void)
{
int i, k;
unsigned long flags;
for (k = 0; k < MAX_CHECK_BUS; k++) {
for (i = 0; i < 32; i++) {
u32 val1;
u32 addr;
pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1);
if (val1 == 0xffffffff)
continue;
/* Locking probably not needed, but safer */
spin_lock_irqsave(&pci_config_lock, flags);
addr = get_base_addr(0, k, PCI_DEVFN(i, 0));
if (addr != 0)
pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0));
if (addr == 0 ||
readl((u32 __iomem *)mmcfg_virt_addr) != val1) {
set_bit(i + 32*k, fallback_slots);
printk(KERN_NOTICE
"PCI: No mmconfig possible on %x:%x\n", k, i);
}
spin_unlock_irqrestore(&pci_config_lock, flags);
}
}
}
void __init pci_mmcfg_init(int type)
{
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
return;
acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
if ((pci_mmcfg_config_num == 0) ||
(pci_mmcfg_config == NULL) ||
(pci_mmcfg_config[0].address == 0))
return;
/* Only do this check when type 1 works. If it doesn't work
assume we run on a Mac and always use MCFG */
if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].address,
pci_mmcfg_config[0].address + MMCONFIG_APER_MIN,
E820_RESERVED)) {
printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %lx is not E820-reserved\n",
(unsigned long)pci_mmcfg_config[0].address);
printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
return;
}
printk(KERN_INFO "PCI: Using MMCONFIG\n");
raw_pci_ops = &pci_mmcfg;
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
unreachable_devices();
}