kernel-ark/arch/x86/kernel/pci-nommu.c
FUJITA Tomonori 75f1cdf1dd x86: Handle HW IOMMU initialization failure gracefully
If HW IOMMU initialization fails (Intel VT-d often does this,
typically due to BIOS bugs), we fall back to nommu. It doesn't
work for the majority since nowadays we have more than 4GB
memory so we must use swiotlb instead of nommu.

The problem is that it's too late to initialize swiotlb when HW
IOMMU initialization fails. We need to allocate swiotlb memory
earlier from bootmem allocator. Chris explained the issue in
detail:

  http://marc.info/?l=linux-kernel&m=125657444317079&w=2

The current x86 IOMMU initialization sequence is too complicated
and handling the above issue makes it more hacky.

This patch changes x86 IOMMU initialization sequence to handle
the above issue cleanly.

The new x86 IOMMU initialization sequence are:

1. we initialize the swiotlb (and setting swiotlb to 1) in the case
   of (max_pfn > MAX_DMA32_PFN && !no_iommu). dma_ops is set to
   swiotlb_dma_ops or nommu_dma_ops. if swiotlb usage is forced by
   the boot option, we finish here.

2. we call the detection functions of all the IOMMUs

3. the detection function sets x86_init.iommu.iommu_init to the
   IOMMU initialization function (so we can avoid calling the
   initialization functions of all the IOMMUs needlessly).

4. if the IOMMU initialization function doesn't need to swiotlb
   then sets swiotlb to zero (e.g. the initialization is
   sucessful).

5. if we find that swiotlb is set to zero, we free swiotlb
   resource.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: chrisw@sous-sol.org
Cc: dwmw2@infradead.org
Cc: joerg.roedel@amd.com
Cc: muli@il.ibm.com
LKML-Reference: <1257849980-22640-10-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-10 12:32:07 +01:00

106 lines
2.9 KiB
C

/* Fallback functions when the main IOMMU code is not compiled in. This
code is roughly equivalent to i386. */
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/mm.h>
#include <asm/processor.h>
#include <asm/iommu.h>
#include <asm/dma.h>
static int
check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
{
if (hwdev && !dma_capable(hwdev, bus, size)) {
if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
printk(KERN_ERR
"nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
name, (long long)bus, size,
(long long)*hwdev->dma_mask);
return 0;
}
return 1;
}
static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
struct dma_attrs *attrs)
{
dma_addr_t bus = page_to_phys(page) + offset;
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return bad_dma_address;
flush_write_buffers();
return bus;
}
/* Map a set of buffers described by scatterlist in streaming
* mode for DMA. This is the scatter-gather version of the
* above pci_map_single interface. Here the scatter gather list
* elements are each tagged with the appropriate dma address
* and length. They are obtained via sg_dma_{address,length}(SG).
*
* NOTE: An implementation may be able to use a smaller number of
* DMA address/length pairs than there are SG table elements.
* (for example via virtual mapping capabilities)
* The routine returns the number of addr/length pairs actually
* used, at most nents.
*
* Device ownership issues as mentioned above for pci_map_single are
* the same here.
*/
static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
struct scatterlist *s;
int i;
WARN_ON(nents == 0 || sg[0].length == 0);
for_each_sg(sg, s, nents, i) {
BUG_ON(!sg_page(s));
s->dma_address = sg_phys(s);
if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
return 0;
s->dma_length = s->length;
}
flush_write_buffers();
return nents;
}
static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr)
{
free_pages((unsigned long)vaddr, get_order(size));
}
static void nommu_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size,
enum dma_data_direction dir)
{
flush_write_buffers();
}
static void nommu_sync_sg_for_device(struct device *dev,
struct scatterlist *sg, int nelems,
enum dma_data_direction dir)
{
flush_write_buffers();
}
struct dma_map_ops nommu_dma_ops = {
.alloc_coherent = dma_generic_alloc_coherent,
.free_coherent = nommu_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
.sync_single_for_device = nommu_sync_single_for_device,
.sync_sg_for_device = nommu_sync_sg_for_device,
.is_phys = 1,
};