15285 lines
416 KiB
Diff
15285 lines
416 KiB
Diff
diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
|
|
index 96fc623..ed28bcd 100644
|
|
--- a/arch/ia64/include/asm/xen/hypercall.h
|
|
+++ b/arch/ia64/include/asm/xen/hypercall.h
|
|
@@ -107,7 +107,7 @@ extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
|
|
static inline int
|
|
xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
|
|
{
|
|
- return _hypercall2(int, sched_op_new, cmd, arg);
|
|
+ return _hypercall2(int, sched_op, cmd, arg);
|
|
}
|
|
|
|
static inline long
|
|
diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
|
|
index fd66b04..419c862 100644
|
|
--- a/arch/ia64/xen/suspend.c
|
|
+++ b/arch/ia64/xen/suspend.c
|
|
@@ -37,19 +37,14 @@ xen_mm_unpin_all(void)
|
|
/* nothing */
|
|
}
|
|
|
|
-void xen_pre_device_suspend(void)
|
|
-{
|
|
- /* nothing */
|
|
-}
|
|
-
|
|
void
|
|
-xen_pre_suspend()
|
|
+xen_arch_pre_suspend()
|
|
{
|
|
/* nothing */
|
|
}
|
|
|
|
void
|
|
-xen_post_suspend(int suspend_cancelled)
|
|
+xen_arch_post_suspend(int suspend_cancelled)
|
|
{
|
|
if (suspend_cancelled)
|
|
return;
|
|
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
|
|
index a3c28ae..8508bfe 100644
|
|
--- a/arch/x86/include/asm/xen/hypercall.h
|
|
+++ b/arch/x86/include/asm/xen/hypercall.h
|
|
@@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set)
|
|
static inline int
|
|
HYPERVISOR_sched_op(int cmd, void *arg)
|
|
{
|
|
- return _hypercall2(int, sched_op_new, cmd, arg);
|
|
+ return _hypercall2(int, sched_op, cmd, arg);
|
|
}
|
|
|
|
static inline long
|
|
@@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value)
|
|
#endif
|
|
|
|
static inline int
|
|
-HYPERVISOR_suspend(unsigned long srec)
|
|
+HYPERVISOR_suspend(unsigned long start_info_mfn)
|
|
{
|
|
- return _hypercall3(int, sched_op, SCHEDOP_shutdown,
|
|
- SHUTDOWN_suspend, srec);
|
|
+ struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
|
|
+
|
|
+ /*
|
|
+ * For a PV guest the tools require that the start_info mfn be
|
|
+ * present in rdx/edx when the hypercall is made. Per the
|
|
+ * hypercall calling convention this is the third hypercall
|
|
+ * argument, which is start_info_mfn here.
|
|
+ */
|
|
+ return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn);
|
|
}
|
|
|
|
static inline int
|
|
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
|
|
index f25bdf2..64a619d 100644
|
|
--- a/arch/x86/include/asm/xen/page.h
|
|
+++ b/arch/x86/include/asm/xen/page.h
|
|
@@ -29,8 +29,10 @@ typedef struct xpaddr {
|
|
|
|
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
|
|
#define INVALID_P2M_ENTRY (~0UL)
|
|
-#define FOREIGN_FRAME_BIT (1UL<<31)
|
|
+#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
|
|
+#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
|
|
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
|
|
+#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
|
|
|
|
/* Maximum amount of memory we can handle in a domain in pages */
|
|
#define MAX_DOMAIN_PAGES \
|
|
@@ -41,12 +43,19 @@ extern unsigned int machine_to_phys_order;
|
|
|
|
extern unsigned long get_phys_to_machine(unsigned long pfn);
|
|
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
|
|
+extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
|
|
+extern unsigned long set_phys_range_identity(unsigned long pfn_s,
|
|
+ unsigned long pfn_e);
|
|
|
|
-extern int m2p_add_override(unsigned long mfn, struct page *page);
|
|
-extern int m2p_remove_override(struct page *page);
|
|
+extern int m2p_add_override(unsigned long mfn, struct page *page,
|
|
+ bool clear_pte);
|
|
+extern int m2p_remove_override(struct page *page, bool clear_pte);
|
|
extern struct page *m2p_find_override(unsigned long mfn);
|
|
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
|
|
|
|
+#ifdef CONFIG_XEN_DEBUG_FS
|
|
+extern int p2m_dump_show(struct seq_file *m, void *v);
|
|
+#endif
|
|
static inline unsigned long pfn_to_mfn(unsigned long pfn)
|
|
{
|
|
unsigned long mfn;
|
|
@@ -57,7 +66,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
|
|
mfn = get_phys_to_machine(pfn);
|
|
|
|
if (mfn != INVALID_P2M_ENTRY)
|
|
- mfn &= ~FOREIGN_FRAME_BIT;
|
|
+ mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
|
|
|
|
return mfn;
|
|
}
|
|
@@ -73,25 +82,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
|
|
static inline unsigned long mfn_to_pfn(unsigned long mfn)
|
|
{
|
|
unsigned long pfn;
|
|
+ int ret = 0;
|
|
|
|
if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
return mfn;
|
|
|
|
+ if (unlikely((mfn >> machine_to_phys_order) != 0)) {
|
|
+ pfn = ~0;
|
|
+ goto try_override;
|
|
+ }
|
|
pfn = 0;
|
|
/*
|
|
* The array access can fail (e.g., device space beyond end of RAM).
|
|
* In such cases it doesn't matter what we return (we return garbage),
|
|
* but we must handle the fault without crashing!
|
|
*/
|
|
- __get_user(pfn, &machine_to_phys_mapping[mfn]);
|
|
-
|
|
- /*
|
|
- * If this appears to be a foreign mfn (because the pfn
|
|
- * doesn't map back to the mfn), then check the local override
|
|
- * table to see if there's a better pfn to use.
|
|
+ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
|
|
+try_override:
|
|
+ /* ret might be < 0 if there are no entries in the m2p for mfn */
|
|
+ if (ret < 0)
|
|
+ pfn = ~0;
|
|
+ else if (get_phys_to_machine(pfn) != mfn)
|
|
+ /*
|
|
+ * If this appears to be a foreign mfn (because the pfn
|
|
+ * doesn't map back to the mfn), then check the local override
|
|
+ * table to see if there's a better pfn to use.
|
|
+ *
|
|
+ * m2p_find_override_pfn returns ~0 if it doesn't find anything.
|
|
+ */
|
|
+ pfn = m2p_find_override_pfn(mfn, ~0);
|
|
+
|
|
+ /*
|
|
+ * pfn is ~0 if there are no entries in the m2p for mfn or if the
|
|
+ * entry doesn't map back to the mfn and m2p_override doesn't have a
|
|
+ * valid entry for it.
|
|
*/
|
|
- if (get_phys_to_machine(pfn) != mfn)
|
|
- pfn = m2p_find_override_pfn(mfn, pfn);
|
|
+ if (pfn == ~0 &&
|
|
+ get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
|
|
+ pfn = mfn;
|
|
|
|
return pfn;
|
|
}
|
|
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
|
|
index 2329b3e..4fbda9a 100644
|
|
--- a/arch/x86/include/asm/xen/pci.h
|
|
+++ b/arch/x86/include/asm/xen/pci.h
|
|
@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
|
|
#endif
|
|
#if defined(CONFIG_XEN_DOM0)
|
|
void __init xen_setup_pirqs(void);
|
|
+int xen_find_device_domain_owner(struct pci_dev *dev);
|
|
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
|
|
+int xen_unregister_device_domain_owner(struct pci_dev *dev);
|
|
#else
|
|
static inline void __init xen_setup_pirqs(void)
|
|
{
|
|
}
|
|
+static inline int xen_find_device_domain_owner(struct pci_dev *dev)
|
|
+{
|
|
+ return -1;
|
|
+}
|
|
+static inline int xen_register_device_domain_owner(struct pci_dev *dev,
|
|
+ uint16_t domain)
|
|
+{
|
|
+ return -1;
|
|
+}
|
|
+static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
|
|
+{
|
|
+ return -1;
|
|
+}
|
|
#endif
|
|
|
|
#if defined(CONFIG_PCI_MSI)
|
|
@@ -27,16 +43,16 @@ static inline void __init xen_setup_pirqs(void)
|
|
* its own functions.
|
|
*/
|
|
struct xen_pci_frontend_ops {
|
|
- int (*enable_msi)(struct pci_dev *dev, int **vectors);
|
|
+ int (*enable_msi)(struct pci_dev *dev, int vectors[]);
|
|
void (*disable_msi)(struct pci_dev *dev);
|
|
- int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
|
|
+ int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
|
|
void (*disable_msix)(struct pci_dev *dev);
|
|
};
|
|
|
|
extern struct xen_pci_frontend_ops *xen_pci_frontend;
|
|
|
|
static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
|
|
- int **vectors)
|
|
+ int vectors[])
|
|
{
|
|
if (xen_pci_frontend && xen_pci_frontend->enable_msi)
|
|
return xen_pci_frontend->enable_msi(dev, vectors);
|
|
@@ -48,7 +64,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
|
|
xen_pci_frontend->disable_msi(dev);
|
|
}
|
|
static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
|
|
- int **vectors, int nvec)
|
|
+ int vectors[], int nvec)
|
|
{
|
|
if (xen_pci_frontend && xen_pci_frontend->enable_msix)
|
|
return xen_pci_frontend->enable_msix(dev, vectors, nvec);
|
|
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
|
|
index 947f42a..66637bd 100644
|
|
--- a/arch/x86/mm/init.c
|
|
+++ b/arch/x86/mm/init.c
|
|
@@ -283,6 +283,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
|
|
if (!after_bootmem && !start) {
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
+ unsigned long addr;
|
|
+ u64 size, memblock_addr;
|
|
|
|
mmu_cr4_features = read_cr4();
|
|
|
|
@@ -291,11 +293,18 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
|
|
* located on different 2M pages. cleanup_highmap(), however,
|
|
* can only consider _end when it runs, so destroy any
|
|
* mappings beyond _brk_end here.
|
|
+ * Respect memblock reserved regions.
|
|
*/
|
|
pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
|
|
pmd = pmd_offset(pud, _brk_end - 1);
|
|
- while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
|
|
- pmd_clear(pmd);
|
|
+ addr = (_brk_end + PMD_SIZE - 1) & PMD_MASK;
|
|
+ while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1)) {
|
|
+ memblock_addr = memblock_x86_find_in_range_size(__pa(addr),
|
|
+ &size, PMD_SIZE);
|
|
+ if (memblock_addr == (u64) __pa(addr) && size >= PMD_SIZE)
|
|
+ pmd_clear(pmd);
|
|
+ addr += PMD_SIZE;
|
|
+ }
|
|
}
|
|
#endif
|
|
__flush_tlb_all();
|
|
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
|
|
index 25cd4a0..309c0a0 100644
|
|
--- a/arch/x86/pci/xen.c
|
|
+++ b/arch/x86/pci/xen.c
|
|
@@ -20,7 +20,8 @@
|
|
#include <asm/xen/pci.h>
|
|
|
|
#ifdef CONFIG_ACPI
|
|
-static int xen_hvm_register_pirq(u32 gsi, int triggering)
|
|
+static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
|
|
+ int trigger, int polarity)
|
|
{
|
|
int rc, irq;
|
|
struct physdev_map_pirq map_irq;
|
|
@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
|
|
return -1;
|
|
}
|
|
|
|
- if (triggering == ACPI_EDGE_SENSITIVE) {
|
|
+ if (trigger == ACPI_EDGE_SENSITIVE) {
|
|
shareable = 0;
|
|
name = "ioapic-edge";
|
|
} else {
|
|
@@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
|
|
|
|
return irq;
|
|
}
|
|
-
|
|
-static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
|
|
- int trigger, int polarity)
|
|
-{
|
|
- return xen_hvm_register_pirq(gsi, trigger);
|
|
-}
|
|
#endif
|
|
|
|
#if defined(CONFIG_PCI_MSI)
|
|
@@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
|
|
|
|
static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
{
|
|
- int irq, pirq, ret = 0;
|
|
+ int irq, pirq;
|
|
struct msi_desc *msidesc;
|
|
struct msi_msg msg;
|
|
|
|
@@ -99,39 +94,33 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
__read_msi_msg(msidesc, &msg);
|
|
pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
|
|
((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
|
|
- if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
|
|
- xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
|
|
- "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ);
|
|
- if (irq < 0)
|
|
+ if (msg.data != XEN_PIRQ_MSI_DATA ||
|
|
+ xen_irq_from_pirq(pirq) < 0) {
|
|
+ pirq = xen_allocate_pirq_msi(dev, msidesc);
|
|
+ if (pirq < 0)
|
|
goto error;
|
|
- ret = set_irq_msi(irq, msidesc);
|
|
- if (ret < 0)
|
|
- goto error_while;
|
|
- printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d"
|
|
- " pirq=%d\n", irq, pirq);
|
|
- return 0;
|
|
+ xen_msi_compose_msg(dev, pirq, &msg);
|
|
+ __write_msi_msg(msidesc, &msg);
|
|
+ dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
|
|
+ } else {
|
|
+ dev_dbg(&dev->dev,
|
|
+ "xen: msi already bound to pirq=%d\n", pirq);
|
|
}
|
|
- xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
|
|
- "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ));
|
|
- if (irq < 0 || pirq < 0)
|
|
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
|
|
+ (type == PCI_CAP_ID_MSIX) ?
|
|
+ "msi-x" : "msi",
|
|
+ DOMID_SELF);
|
|
+ if (irq < 0)
|
|
goto error;
|
|
- printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
|
|
- xen_msi_compose_msg(dev, pirq, &msg);
|
|
- ret = set_irq_msi(irq, msidesc);
|
|
- if (ret < 0)
|
|
- goto error_while;
|
|
- write_msi_msg(irq, &msg);
|
|
+ dev_dbg(&dev->dev,
|
|
+ "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
|
|
}
|
|
return 0;
|
|
|
|
-error_while:
|
|
- unbind_from_irqhandler(irq, NULL);
|
|
error:
|
|
- if (ret == -ENODEV)
|
|
- dev_err(&dev->dev, "Xen PCI frontend has not registered" \
|
|
- " MSI/MSI-X support!\n");
|
|
-
|
|
- return ret;
|
|
+ dev_err(&dev->dev,
|
|
+ "Xen PCI frontend has not registered MSI/MSI-X support!\n");
|
|
+ return -ENODEV;
|
|
}
|
|
|
|
/*
|
|
@@ -150,35 +139,27 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
return -ENOMEM;
|
|
|
|
if (type == PCI_CAP_ID_MSIX)
|
|
- ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
|
|
+ ret = xen_pci_frontend_enable_msix(dev, v, nvec);
|
|
else
|
|
- ret = xen_pci_frontend_enable_msi(dev, &v);
|
|
+ ret = xen_pci_frontend_enable_msi(dev, v);
|
|
if (ret)
|
|
goto error;
|
|
i = 0;
|
|
list_for_each_entry(msidesc, &dev->msi_list, list) {
|
|
- irq = xen_allocate_pirq(v[i], 0, /* not sharable */
|
|
- (type == PCI_CAP_ID_MSIX) ?
|
|
- "pcifront-msi-x" : "pcifront-msi");
|
|
- if (irq < 0) {
|
|
- ret = -1;
|
|
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
|
|
+ (type == PCI_CAP_ID_MSIX) ?
|
|
+ "pcifront-msi-x" :
|
|
+ "pcifront-msi",
|
|
+ DOMID_SELF);
|
|
+ if (irq < 0)
|
|
goto free;
|
|
- }
|
|
-
|
|
- ret = set_irq_msi(irq, msidesc);
|
|
- if (ret)
|
|
- goto error_while;
|
|
i++;
|
|
}
|
|
kfree(v);
|
|
return 0;
|
|
|
|
-error_while:
|
|
- unbind_from_irqhandler(irq, NULL);
|
|
error:
|
|
- if (ret == -ENODEV)
|
|
- dev_err(&dev->dev, "Xen PCI frontend has not registered" \
|
|
- " MSI/MSI-X support!\n");
|
|
+ dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
|
|
free:
|
|
kfree(v);
|
|
return ret;
|
|
@@ -193,6 +174,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
|
|
xen_pci_frontend_disable_msix(dev);
|
|
else
|
|
xen_pci_frontend_disable_msi(dev);
|
|
+
|
|
+ /* Free the IRQ's and the msidesc using the generic code. */
|
|
+ default_teardown_msi_irqs(dev);
|
|
}
|
|
|
|
static void xen_teardown_msi_irq(unsigned int irq)
|
|
@@ -200,47 +184,91 @@ static void xen_teardown_msi_irq(unsigned int irq)
|
|
xen_destroy_irq(irq);
|
|
}
|
|
|
|
+#ifdef CONFIG_XEN_DOM0
|
|
static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
{
|
|
- int irq, ret;
|
|
+ int ret = 0;
|
|
struct msi_desc *msidesc;
|
|
|
|
list_for_each_entry(msidesc, &dev->msi_list, list) {
|
|
- irq = xen_create_msi_irq(dev, msidesc, type);
|
|
- if (irq < 0)
|
|
- return -1;
|
|
+ struct physdev_map_pirq map_irq;
|
|
+ domid_t domid;
|
|
|
|
- ret = set_irq_msi(irq, msidesc);
|
|
- if (ret)
|
|
- goto error;
|
|
- }
|
|
- return 0;
|
|
+ domid = ret = xen_find_device_domain_owner(dev);
|
|
+ /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
|
|
+ * hence check ret value for < 0. */
|
|
+ if (ret < 0)
|
|
+ domid = DOMID_SELF;
|
|
|
|
-error:
|
|
- xen_destroy_irq(irq);
|
|
+ memset(&map_irq, 0, sizeof(map_irq));
|
|
+ map_irq.domid = domid;
|
|
+ map_irq.type = MAP_PIRQ_TYPE_MSI;
|
|
+ map_irq.index = -1;
|
|
+ map_irq.pirq = -1;
|
|
+ map_irq.bus = dev->bus->number;
|
|
+ map_irq.devfn = dev->devfn;
|
|
+
|
|
+ if (type == PCI_CAP_ID_MSIX) {
|
|
+ int pos;
|
|
+ u32 table_offset, bir;
|
|
+
|
|
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
|
|
+
|
|
+ pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
|
|
+ &table_offset);
|
|
+ bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
|
|
+
|
|
+ map_irq.table_base = pci_resource_start(dev, bir);
|
|
+ map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
|
|
+ }
|
|
+
|
|
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
|
|
+ if (ret) {
|
|
+ dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
|
|
+ ret, domid);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
|
|
+ map_irq.pirq, map_irq.index,
|
|
+ (type == PCI_CAP_ID_MSIX) ?
|
|
+ "msi-x" : "msi",
|
|
+ domid);
|
|
+ if (ret < 0)
|
|
+ goto out;
|
|
+ }
|
|
+ ret = 0;
|
|
+out:
|
|
return ret;
|
|
}
|
|
#endif
|
|
+#endif
|
|
|
|
static int xen_pcifront_enable_irq(struct pci_dev *dev)
|
|
{
|
|
int rc;
|
|
int share = 1;
|
|
+ u8 gsi;
|
|
|
|
- dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
|
|
-
|
|
- if (dev->irq < 0)
|
|
- return -EINVAL;
|
|
+ rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
|
|
+ if (rc < 0) {
|
|
+ dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
|
|
+ rc);
|
|
+ return rc;
|
|
+ }
|
|
|
|
- if (dev->irq < NR_IRQS_LEGACY)
|
|
+ if (gsi < NR_IRQS_LEGACY)
|
|
share = 0;
|
|
|
|
- rc = xen_allocate_pirq(dev->irq, share, "pcifront");
|
|
+ rc = xen_allocate_pirq(gsi, share, "pcifront");
|
|
if (rc < 0) {
|
|
- dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
|
|
- dev->irq, rc);
|
|
+ dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
|
|
+ gsi, rc);
|
|
return rc;
|
|
}
|
|
+
|
|
+ dev->irq = rc;
|
|
+ dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
|
|
return 0;
|
|
}
|
|
|
|
@@ -427,3 +455,76 @@ void __init xen_setup_pirqs(void)
|
|
}
|
|
}
|
|
#endif
|
|
+
|
|
+struct xen_device_domain_owner {
|
|
+ domid_t domain;
|
|
+ struct pci_dev *dev;
|
|
+ struct list_head list;
|
|
+};
|
|
+
|
|
+static DEFINE_SPINLOCK(dev_domain_list_spinlock);
|
|
+static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
|
|
+
|
|
+static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
|
|
+{
|
|
+ struct xen_device_domain_owner *owner;
|
|
+
|
|
+ list_for_each_entry(owner, &dev_domain_list, list) {
|
|
+ if (owner->dev == dev)
|
|
+ return owner;
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+int xen_find_device_domain_owner(struct pci_dev *dev)
|
|
+{
|
|
+ struct xen_device_domain_owner *owner;
|
|
+ int domain = -ENODEV;
|
|
+
|
|
+ spin_lock(&dev_domain_list_spinlock);
|
|
+ owner = find_device(dev);
|
|
+ if (owner)
|
|
+ domain = owner->domain;
|
|
+ spin_unlock(&dev_domain_list_spinlock);
|
|
+ return domain;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
|
|
+
|
|
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
|
|
+{
|
|
+ struct xen_device_domain_owner *owner;
|
|
+
|
|
+ owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
|
|
+ if (!owner)
|
|
+ return -ENODEV;
|
|
+
|
|
+ spin_lock(&dev_domain_list_spinlock);
|
|
+ if (find_device(dev)) {
|
|
+ spin_unlock(&dev_domain_list_spinlock);
|
|
+ kfree(owner);
|
|
+ return -EEXIST;
|
|
+ }
|
|
+ owner->domain = domain;
|
|
+ owner->dev = dev;
|
|
+ list_add_tail(&owner->list, &dev_domain_list);
|
|
+ spin_unlock(&dev_domain_list_spinlock);
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
|
|
+
|
|
+int xen_unregister_device_domain_owner(struct pci_dev *dev)
|
|
+{
|
|
+ struct xen_device_domain_owner *owner;
|
|
+
|
|
+ spin_lock(&dev_domain_list_spinlock);
|
|
+ owner = find_device(dev);
|
|
+ if (!owner) {
|
|
+ spin_unlock(&dev_domain_list_spinlock);
|
|
+ return -ENODEV;
|
|
+ }
|
|
+ list_del(&owner->list);
|
|
+ spin_unlock(&dev_domain_list_spinlock);
|
|
+ kfree(owner);
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
|
|
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
|
|
index 5b54892..e4343fe 100644
|
|
--- a/arch/x86/xen/Kconfig
|
|
+++ b/arch/x86/xen/Kconfig
|
|
@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
|
|
help
|
|
Enable statistics output and various tuning options in debugfs.
|
|
Enabling this option may incur a significant performance overhead.
|
|
+
|
|
+config XEN_DEBUG
|
|
+ bool "Enable Xen debug checks"
|
|
+ depends on XEN
|
|
+ default n
|
|
+ help
|
|
+ Enable various WARN_ON checks in the Xen MMU code.
|
|
+ Enabling this option WILL incur a significant performance overhead.
|
|
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
|
|
index 50542ef..49dbd78 100644
|
|
--- a/arch/x86/xen/enlighten.c
|
|
+++ b/arch/x86/xen/enlighten.c
|
|
@@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor)
|
|
|
|
xen_setup_features();
|
|
|
|
- pv_info = xen_info;
|
|
- pv_info.kernel_rpl = 0;
|
|
+ pv_info.name = "Xen HVM";
|
|
|
|
xen_domain_type = XEN_HVM_DOMAIN;
|
|
|
|
return 0;
|
|
}
|
|
|
|
-void xen_hvm_init_shared_info(void)
|
|
+void __ref xen_hvm_init_shared_info(void)
|
|
{
|
|
int cpu;
|
|
struct xen_add_to_physmap xatp;
|
|
@@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
|
|
switch (action) {
|
|
case CPU_UP_PREPARE:
|
|
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
|
|
+ if (xen_have_vector_callback)
|
|
+ xen_init_lock_cpu(cpu);
|
|
break;
|
|
default:
|
|
break;
|
|
@@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void)
|
|
|
|
if (xen_feature(XENFEAT_hvm_callback_vector))
|
|
xen_have_vector_callback = 1;
|
|
+ xen_hvm_smp_init();
|
|
register_cpu_notifier(&xen_hvm_cpu_notifier);
|
|
xen_unplug_emulated_devices();
|
|
have_vcpu_info_placement = 0;
|
|
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
|
|
index 5e92b61..0c376a2 100644
|
|
--- a/arch/x86/xen/mmu.c
|
|
+++ b/arch/x86/xen/mmu.c
|
|
@@ -46,6 +46,7 @@
|
|
#include <linux/module.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/memblock.h>
|
|
+#include <linux/seq_file.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
#include <asm/tlbflush.h>
|
|
@@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
|
|
if (val & _PAGE_PRESENT) {
|
|
unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
|
|
pteval_t flags = val & PTE_FLAGS_MASK;
|
|
- unsigned long mfn = pfn_to_mfn(pfn);
|
|
+ unsigned long mfn;
|
|
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ mfn = get_phys_to_machine(pfn);
|
|
+ else
|
|
+ mfn = pfn;
|
|
/*
|
|
* If there's no mfn for the pfn, then just create an
|
|
* empty non-present pte. Unfortunately this loses
|
|
@@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
|
|
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
|
|
mfn = 0;
|
|
flags = 0;
|
|
+ } else {
|
|
+ /*
|
|
+ * Paramount to do this test _after_ the
|
|
+ * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
|
|
+ * IDENTITY_FRAME_BIT resolves to true.
|
|
+ */
|
|
+ mfn &= ~FOREIGN_FRAME_BIT;
|
|
+ if (mfn & IDENTITY_FRAME_BIT) {
|
|
+ mfn &= ~IDENTITY_FRAME_BIT;
|
|
+ flags |= _PAGE_IOMAP;
|
|
+ }
|
|
}
|
|
-
|
|
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
|
|
}
|
|
|
|
@@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
|
|
}
|
|
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
|
|
|
|
+#ifdef CONFIG_XEN_DEBUG
|
|
+pte_t xen_make_pte_debug(pteval_t pte)
|
|
+{
|
|
+ phys_addr_t addr = (pte & PTE_PFN_MASK);
|
|
+ phys_addr_t other_addr;
|
|
+ bool io_page = false;
|
|
+ pte_t _pte;
|
|
+
|
|
+ if (pte & _PAGE_IOMAP)
|
|
+ io_page = true;
|
|
+
|
|
+ _pte = xen_make_pte(pte);
|
|
+
|
|
+ if (!addr)
|
|
+ return _pte;
|
|
+
|
|
+ if (io_page &&
|
|
+ (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
|
|
+ other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
|
|
+ WARN(addr != other_addr,
|
|
+ "0x%lx is using VM_IO, but it is 0x%lx!\n",
|
|
+ (unsigned long)addr, (unsigned long)other_addr);
|
|
+ } else {
|
|
+ pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
|
|
+ other_addr = (_pte.pte & PTE_PFN_MASK);
|
|
+ WARN((addr == other_addr) && (!io_page) && (!iomap_set),
|
|
+ "0x%lx is missing VM_IO (and wasn't fixed)!\n",
|
|
+ (unsigned long)addr);
|
|
+ }
|
|
+
|
|
+ return _pte;
|
|
+}
|
|
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
|
|
+#endif
|
|
+
|
|
pgd_t xen_make_pgd(pgdval_t pgd)
|
|
{
|
|
pgd = pte_pfn_to_mfn(pgd);
|
|
@@ -1942,6 +1992,9 @@ __init void xen_ident_map_ISA(void)
|
|
|
|
static __init void xen_post_allocator_init(void)
|
|
{
|
|
+#ifdef CONFIG_XEN_DEBUG
|
|
+ pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
|
|
+#endif
|
|
pv_mmu_ops.set_pte = xen_set_pte;
|
|
pv_mmu_ops.set_pmd = xen_set_pmd;
|
|
pv_mmu_ops.set_pud = xen_set_pud;
|
|
@@ -2074,7 +2127,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
|
|
in_frames[i] = virt_to_mfn(vaddr);
|
|
|
|
MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
|
|
- set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
|
|
+ __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
|
|
|
|
if (out_frames)
|
|
out_frames[i] = virt_to_pfn(vaddr);
|
|
@@ -2353,6 +2406,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
|
|
|
|
#ifdef CONFIG_XEN_DEBUG_FS
|
|
|
|
+static int p2m_dump_open(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ return single_open(filp, p2m_dump_show, NULL);
|
|
+}
|
|
+
|
|
+static const struct file_operations p2m_dump_fops = {
|
|
+ .open = p2m_dump_open,
|
|
+ .read = seq_read,
|
|
+ .llseek = seq_lseek,
|
|
+ .release = single_release,
|
|
+};
|
|
+
|
|
static struct dentry *d_mmu_debug;
|
|
|
|
static int __init xen_mmu_debugfs(void)
|
|
@@ -2408,6 +2473,7 @@ static int __init xen_mmu_debugfs(void)
|
|
debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
|
|
&mmu_stats.prot_commit_batched);
|
|
|
|
+ debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
|
|
return 0;
|
|
}
|
|
fs_initcall(xen_mmu_debugfs);
|
|
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
|
|
index fd12d7c..dd5e735 100644
|
|
--- a/arch/x86/xen/p2m.c
|
|
+++ b/arch/x86/xen/p2m.c
|
|
@@ -30,6 +30,7 @@
|
|
#include <linux/list.h>
|
|
#include <linux/hash.h>
|
|
#include <linux/sched.h>
|
|
+#include <linux/seq_file.h>
|
|
|
|
#include <asm/cache.h>
|
|
#include <asm/setup.h>
|
|
@@ -59,9 +60,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
|
|
static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
|
|
static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
|
|
|
|
+static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
|
|
+
|
|
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
|
|
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
|
|
|
|
+/* We might hit two boundary violations at the start and end, at max each
|
|
+ * boundary violation will require three middle nodes. */
|
|
+RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
|
|
+
|
|
static inline unsigned p2m_top_index(unsigned long pfn)
|
|
{
|
|
BUG_ON(pfn >= MAX_P2M_PFN);
|
|
@@ -136,7 +143,7 @@ static void p2m_init(unsigned long *p2m)
|
|
* - After resume we're called from within stop_machine, but the mfn
|
|
* tree should alreay be completely allocated.
|
|
*/
|
|
-void xen_build_mfn_list_list(void)
|
|
+void __ref xen_build_mfn_list_list(void)
|
|
{
|
|
unsigned long pfn;
|
|
|
|
@@ -221,6 +228,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
|
|
p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
|
|
p2m_top_init(p2m_top);
|
|
|
|
+ p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
|
|
+ p2m_init(p2m_identity);
|
|
+
|
|
/*
|
|
* The domain builder gives us a pre-constructed p2m array in
|
|
* mfn_list for all the pages initially given to us, so we just
|
|
@@ -266,6 +276,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
|
|
mididx = p2m_mid_index(pfn);
|
|
idx = p2m_index(pfn);
|
|
|
|
+ /*
|
|
+ * The INVALID_P2M_ENTRY is filled in both p2m_*identity
|
|
+ * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
|
|
+ * would be wrong.
|
|
+ */
|
|
+ if (p2m_top[topidx][mididx] == p2m_identity)
|
|
+ return IDENTITY_FRAME(pfn);
|
|
+
|
|
return p2m_top[topidx][mididx][idx];
|
|
}
|
|
EXPORT_SYMBOL_GPL(get_phys_to_machine);
|
|
@@ -335,9 +353,11 @@ static bool alloc_p2m(unsigned long pfn)
|
|
p2m_top_mfn_p[topidx] = mid_mfn;
|
|
}
|
|
|
|
- if (p2m_top[topidx][mididx] == p2m_missing) {
|
|
+ if (p2m_top[topidx][mididx] == p2m_identity ||
|
|
+ p2m_top[topidx][mididx] == p2m_missing) {
|
|
/* p2m leaf page is missing */
|
|
unsigned long *p2m;
|
|
+ unsigned long *p2m_orig = p2m_top[topidx][mididx];
|
|
|
|
p2m = alloc_p2m_page();
|
|
if (!p2m)
|
|
@@ -345,7 +365,7 @@ static bool alloc_p2m(unsigned long pfn)
|
|
|
|
p2m_init(p2m);
|
|
|
|
- if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
|
|
+ if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
|
|
free_p2m_page(p2m);
|
|
else
|
|
mid_mfn[mididx] = virt_to_mfn(p2m);
|
|
@@ -354,11 +374,91 @@ static bool alloc_p2m(unsigned long pfn)
|
|
return true;
|
|
}
|
|
|
|
+bool __early_alloc_p2m(unsigned long pfn)
|
|
+{
|
|
+ unsigned topidx, mididx, idx;
|
|
+
|
|
+ topidx = p2m_top_index(pfn);
|
|
+ mididx = p2m_mid_index(pfn);
|
|
+ idx = p2m_index(pfn);
|
|
+
|
|
+ /* Pfff.. No boundary cross-over, lets get out. */
|
|
+ if (!idx)
|
|
+ return false;
|
|
+
|
|
+ WARN(p2m_top[topidx][mididx] == p2m_identity,
|
|
+ "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
|
|
+ topidx, mididx);
|
|
+
|
|
+ /*
|
|
+ * Could be done by xen_build_dynamic_phys_to_machine..
|
|
+ */
|
|
+ if (p2m_top[topidx][mididx] != p2m_missing)
|
|
+ return false;
|
|
+
|
|
+ /* Boundary cross-over for the edges: */
|
|
+ if (idx) {
|
|
+ unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
|
|
+
|
|
+ p2m_init(p2m);
|
|
+
|
|
+ p2m_top[topidx][mididx] = p2m;
|
|
+
|
|
+ }
|
|
+ return idx != 0;
|
|
+}
|
|
+unsigned long set_phys_range_identity(unsigned long pfn_s,
|
|
+ unsigned long pfn_e)
|
|
+{
|
|
+ unsigned long pfn;
|
|
+
|
|
+ if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
|
|
+ return 0;
|
|
+
|
|
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
|
|
+ return pfn_e - pfn_s;
|
|
+
|
|
+ if (pfn_s > pfn_e)
|
|
+ return 0;
|
|
+
|
|
+ for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
|
|
+ pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
|
|
+ pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
|
|
+ {
|
|
+ unsigned topidx = p2m_top_index(pfn);
|
|
+ if (p2m_top[topidx] == p2m_mid_missing) {
|
|
+ unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
|
|
+
|
|
+ p2m_mid_init(mid);
|
|
+
|
|
+ p2m_top[topidx] = mid;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ __early_alloc_p2m(pfn_s);
|
|
+ __early_alloc_p2m(pfn_e);
|
|
+
|
|
+ for (pfn = pfn_s; pfn < pfn_e; pfn++)
|
|
+ if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
|
|
+ break;
|
|
+
|
|
+ if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
|
|
+ "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
|
|
+ (pfn_e - pfn_s) - (pfn - pfn_s)))
|
|
+ printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
|
|
+
|
|
+ return pfn - pfn_s;
|
|
+}
|
|
+
|
|
/* Try to install p2m mapping; fail if intermediate bits missing */
|
|
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
|
|
{
|
|
unsigned topidx, mididx, idx;
|
|
|
|
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
|
|
+ BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
|
|
+ return true;
|
|
+ }
|
|
if (unlikely(pfn >= MAX_P2M_PFN)) {
|
|
BUG_ON(mfn != INVALID_P2M_ENTRY);
|
|
return true;
|
|
@@ -368,6 +468,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
|
|
mididx = p2m_mid_index(pfn);
|
|
idx = p2m_index(pfn);
|
|
|
|
+ /* For sparse holes were the p2m leaf has real PFN along with
|
|
+ * PCI holes, stick in the PFN as the MFN value.
|
|
+ */
|
|
+ if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
|
|
+ if (p2m_top[topidx][mididx] == p2m_identity)
|
|
+ return true;
|
|
+
|
|
+ /* Swap over from MISSING to IDENTITY if needed. */
|
|
+ if (p2m_top[topidx][mididx] == p2m_missing) {
|
|
+ WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
|
|
+ p2m_identity) != p2m_missing);
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+
|
|
if (p2m_top[topidx][mididx] == p2m_missing)
|
|
return mfn == INVALID_P2M_ENTRY;
|
|
|
|
@@ -378,11 +493,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
|
|
|
|
bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
|
|
{
|
|
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
|
|
- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
|
|
- return true;
|
|
- }
|
|
-
|
|
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
|
|
if (!alloc_p2m(pfn))
|
|
return false;
|
|
@@ -417,11 +527,11 @@ static unsigned long mfn_hash(unsigned long mfn)
|
|
}
|
|
|
|
/* Add an MFN override for a particular page */
|
|
-int m2p_add_override(unsigned long mfn, struct page *page)
|
|
+int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
|
|
{
|
|
unsigned long flags;
|
|
unsigned long pfn;
|
|
- unsigned long address;
|
|
+ unsigned long uninitialized_var(address);
|
|
unsigned level;
|
|
pte_t *ptep = NULL;
|
|
|
|
@@ -429,7 +539,6 @@ int m2p_add_override(unsigned long mfn, struct page *page)
|
|
if (!PageHighMem(page)) {
|
|
address = (unsigned long)__va(pfn << PAGE_SHIFT);
|
|
ptep = lookup_address(address, &level);
|
|
-
|
|
if (WARN(ptep == NULL || level != PG_LEVEL_4K,
|
|
"m2p_add_override: pfn %lx not mapped", pfn))
|
|
return -EINVAL;
|
|
@@ -439,10 +548,9 @@ int m2p_add_override(unsigned long mfn, struct page *page)
|
|
page->index = pfn_to_mfn(pfn);
|
|
|
|
__set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
|
|
- if (!PageHighMem(page))
|
|
+ if (clear_pte && !PageHighMem(page))
|
|
/* Just zap old mapping for now */
|
|
pte_clear(&init_mm, address, ptep);
|
|
-
|
|
spin_lock_irqsave(&m2p_override_lock, flags);
|
|
list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
|
|
spin_unlock_irqrestore(&m2p_override_lock, flags);
|
|
@@ -450,12 +558,12 @@ int m2p_add_override(unsigned long mfn, struct page *page)
|
|
return 0;
|
|
}
|
|
|
|
-int m2p_remove_override(struct page *page)
|
|
+int m2p_remove_override(struct page *page, bool clear_pte)
|
|
{
|
|
unsigned long flags;
|
|
unsigned long mfn;
|
|
unsigned long pfn;
|
|
- unsigned long address;
|
|
+ unsigned long uninitialized_var(address);
|
|
unsigned level;
|
|
pte_t *ptep = NULL;
|
|
|
|
@@ -478,7 +586,7 @@ int m2p_remove_override(struct page *page)
|
|
spin_unlock_irqrestore(&m2p_override_lock, flags);
|
|
__set_phys_to_machine(pfn, page->index);
|
|
|
|
- if (!PageHighMem(page))
|
|
+ if (clear_pte && !PageHighMem(page))
|
|
set_pte_at(&init_mm, address, ptep,
|
|
pfn_pte(pfn, PAGE_KERNEL));
|
|
/* No tlb flush necessary because the caller already
|
|
@@ -520,3 +628,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
|
|
+
|
|
+#ifdef CONFIG_XEN_DEBUG_FS
|
|
+
|
|
+int p2m_dump_show(struct seq_file *m, void *v)
|
|
+{
|
|
+ static const char * const level_name[] = { "top", "middle",
|
|
+ "entry", "abnormal" };
|
|
+ static const char * const type_name[] = { "identity", "missing",
|
|
+ "pfn", "abnormal"};
|
|
+#define TYPE_IDENTITY 0
|
|
+#define TYPE_MISSING 1
|
|
+#define TYPE_PFN 2
|
|
+#define TYPE_UNKNOWN 3
|
|
+ unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
|
|
+ unsigned int uninitialized_var(prev_level);
|
|
+ unsigned int uninitialized_var(prev_type);
|
|
+
|
|
+ if (!p2m_top)
|
|
+ return 0;
|
|
+
|
|
+ for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
|
|
+ unsigned topidx = p2m_top_index(pfn);
|
|
+ unsigned mididx = p2m_mid_index(pfn);
|
|
+ unsigned idx = p2m_index(pfn);
|
|
+ unsigned lvl, type;
|
|
+
|
|
+ lvl = 4;
|
|
+ type = TYPE_UNKNOWN;
|
|
+ if (p2m_top[topidx] == p2m_mid_missing) {
|
|
+ lvl = 0; type = TYPE_MISSING;
|
|
+ } else if (p2m_top[topidx] == NULL) {
|
|
+ lvl = 0; type = TYPE_UNKNOWN;
|
|
+ } else if (p2m_top[topidx][mididx] == NULL) {
|
|
+ lvl = 1; type = TYPE_UNKNOWN;
|
|
+ } else if (p2m_top[topidx][mididx] == p2m_identity) {
|
|
+ lvl = 1; type = TYPE_IDENTITY;
|
|
+ } else if (p2m_top[topidx][mididx] == p2m_missing) {
|
|
+ lvl = 1; type = TYPE_MISSING;
|
|
+ } else if (p2m_top[topidx][mididx][idx] == 0) {
|
|
+ lvl = 2; type = TYPE_UNKNOWN;
|
|
+ } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
|
|
+ lvl = 2; type = TYPE_IDENTITY;
|
|
+ } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
|
|
+ lvl = 2; type = TYPE_MISSING;
|
|
+ } else if (p2m_top[topidx][mididx][idx] == pfn) {
|
|
+ lvl = 2; type = TYPE_PFN;
|
|
+ } else if (p2m_top[topidx][mididx][idx] != pfn) {
|
|
+ lvl = 2; type = TYPE_PFN;
|
|
+ }
|
|
+ if (pfn == 0) {
|
|
+ prev_level = lvl;
|
|
+ prev_type = type;
|
|
+ }
|
|
+ if (pfn == MAX_DOMAIN_PAGES-1) {
|
|
+ lvl = 3;
|
|
+ type = TYPE_UNKNOWN;
|
|
+ }
|
|
+ if (prev_type != type) {
|
|
+ seq_printf(m, " [0x%lx->0x%lx] %s\n",
|
|
+ prev_pfn_type, pfn, type_name[prev_type]);
|
|
+ prev_pfn_type = pfn;
|
|
+ prev_type = type;
|
|
+ }
|
|
+ if (prev_level != lvl) {
|
|
+ seq_printf(m, " [0x%lx->0x%lx] level %s\n",
|
|
+ prev_pfn_level, pfn, level_name[prev_level]);
|
|
+ prev_pfn_level = pfn;
|
|
+ prev_level = lvl;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+#undef TYPE_IDENTITY
|
|
+#undef TYPE_MISSING
|
|
+#undef TYPE_PFN
|
|
+#undef TYPE_UNKNOWN
|
|
+}
|
|
+#endif
|
|
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
|
|
index a8a66a5..edeaff2 100644
|
|
--- a/arch/x86/xen/setup.c
|
|
+++ b/arch/x86/xen/setup.c
|
|
@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
|
|
|
|
static __init void xen_add_extra_mem(unsigned long pages)
|
|
{
|
|
+ unsigned long pfn;
|
|
+
|
|
u64 size = (u64)pages * PAGE_SIZE;
|
|
u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
|
|
|
|
@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
|
|
xen_extra_mem_size += size;
|
|
|
|
xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
|
|
+
|
|
+ for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
|
|
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
}
|
|
|
|
static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
|
|
@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
|
|
WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
|
|
start, end, ret);
|
|
if (ret == 1) {
|
|
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
len++;
|
|
}
|
|
}
|
|
@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
|
|
return released;
|
|
}
|
|
|
|
+static unsigned long __init xen_set_identity(const struct e820entry *list,
|
|
+ ssize_t map_size)
|
|
+{
|
|
+ phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
|
|
+ phys_addr_t start_pci = last;
|
|
+ const struct e820entry *entry;
|
|
+ unsigned long identity = 0;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
|
|
+ phys_addr_t start = entry->addr;
|
|
+ phys_addr_t end = start + entry->size;
|
|
+
|
|
+ if (start < last)
|
|
+ start = last;
|
|
+
|
|
+ if (end <= start)
|
|
+ continue;
|
|
+
|
|
+ /* Skip over the 1MB region. */
|
|
+ if (last > end)
|
|
+ continue;
|
|
+
|
|
+ if (entry->type == E820_RAM) {
|
|
+ if (start > start_pci)
|
|
+ identity += set_phys_range_identity(
|
|
+ PFN_UP(start_pci), PFN_DOWN(start));
|
|
+
|
|
+ /* Without saving 'last' we would gooble RAM too
|
|
+ * at the end of the loop. */
|
|
+ last = end;
|
|
+ start_pci = end;
|
|
+ continue;
|
|
+ }
|
|
+ start_pci = min(start, start_pci);
|
|
+ last = end;
|
|
+ }
|
|
+ if (last > start_pci)
|
|
+ identity += set_phys_range_identity(
|
|
+ PFN_UP(start_pci), PFN_DOWN(last));
|
|
+ return identity;
|
|
+}
|
|
/**
|
|
* machine_specific_memory_setup - Hook for machine specific memory setup.
|
|
**/
|
|
char * __init xen_memory_setup(void)
|
|
{
|
|
static struct e820entry map[E820MAX] __initdata;
|
|
+ static struct e820entry map_raw[E820MAX] __initdata;
|
|
|
|
unsigned long max_pfn = xen_start_info->nr_pages;
|
|
unsigned long long mem_end;
|
|
@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void)
|
|
struct xen_memory_map memmap;
|
|
unsigned long extra_pages = 0;
|
|
unsigned long extra_limit;
|
|
+ unsigned long identity_pages = 0;
|
|
int i;
|
|
int op;
|
|
|
|
@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void)
|
|
}
|
|
BUG_ON(rc);
|
|
|
|
+ memcpy(map_raw, map, sizeof(map));
|
|
e820.nr_map = 0;
|
|
xen_extra_mem_start = mem_end;
|
|
for (i = 0; i < memmap.nr_entries; i++) {
|
|
@@ -194,6 +244,14 @@ char * __init xen_memory_setup(void)
|
|
end -= delta;
|
|
|
|
extra_pages += PFN_DOWN(delta);
|
|
+ /*
|
|
+ * Set RAM below 4GB that is not for us to be unusable.
|
|
+ * This prevents "System RAM" address space from being
|
|
+ * used as potential resource for I/O address (happens
|
|
+ * when 'allocate_resource' is called).
|
|
+ */
|
|
+ if (delta && end < 0x100000000UL)
|
|
+ e820_add_region(end, delta, E820_UNUSABLE);
|
|
}
|
|
|
|
if (map[i].size > 0 && end > xen_extra_mem_start)
|
|
@@ -251,6 +309,13 @@ char * __init xen_memory_setup(void)
|
|
|
|
xen_add_extra_mem(extra_pages);
|
|
|
|
+ /*
|
|
+ * Set P2M for all non-RAM pages and E820 gaps to be identity
|
|
+ * type PFNs. We supply it with the non-sanitized version
|
|
+ * of the E820.
|
|
+ */
|
|
+ identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
|
|
+ printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
|
|
return "Xen";
|
|
}
|
|
|
|
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
|
|
index 72a4c79..3061244 100644
|
|
--- a/arch/x86/xen/smp.c
|
|
+++ b/arch/x86/xen/smp.c
|
|
@@ -509,3 +509,41 @@ void __init xen_smp_init(void)
|
|
xen_fill_possible_map();
|
|
xen_init_spinlocks();
|
|
}
|
|
+
|
|
+static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
|
|
+{
|
|
+ native_smp_prepare_cpus(max_cpus);
|
|
+ WARN_ON(xen_smp_intr_init(0));
|
|
+
|
|
+ if (!xen_have_vector_callback)
|
|
+ return;
|
|
+ xen_init_lock_cpu(0);
|
|
+ xen_init_spinlocks();
|
|
+}
|
|
+
|
|
+static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
|
|
+{
|
|
+ int rc;
|
|
+ rc = native_cpu_up(cpu);
|
|
+ WARN_ON (xen_smp_intr_init(cpu));
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static void xen_hvm_cpu_die(unsigned int cpu)
|
|
+{
|
|
+ unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
|
|
+ unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
|
|
+ unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
|
|
+ unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
|
|
+ native_cpu_die(cpu);
|
|
+}
|
|
+
|
|
+void __init xen_hvm_smp_init(void)
|
|
+{
|
|
+ smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
|
|
+ smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
|
|
+ smp_ops.cpu_up = xen_hvm_cpu_up;
|
|
+ smp_ops.cpu_die = xen_hvm_cpu_die;
|
|
+ smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
|
|
+ smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
|
|
+}
|
|
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
|
|
index 9bbd63a..45329c8 100644
|
|
--- a/arch/x86/xen/suspend.c
|
|
+++ b/arch/x86/xen/suspend.c
|
|
@@ -12,7 +12,7 @@
|
|
#include "xen-ops.h"
|
|
#include "mmu.h"
|
|
|
|
-void xen_pre_suspend(void)
|
|
+void xen_arch_pre_suspend(void)
|
|
{
|
|
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
|
|
xen_start_info->console.domU.mfn =
|
|
@@ -26,8 +26,9 @@ void xen_pre_suspend(void)
|
|
BUG();
|
|
}
|
|
|
|
-void xen_hvm_post_suspend(int suspend_cancelled)
|
|
+void xen_arch_hvm_post_suspend(int suspend_cancelled)
|
|
{
|
|
+#ifdef CONFIG_XEN_PVHVM
|
|
int cpu;
|
|
xen_hvm_init_shared_info();
|
|
xen_callback_vector();
|
|
@@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled)
|
|
xen_setup_runstate_info(cpu);
|
|
}
|
|
}
|
|
+#endif
|
|
}
|
|
|
|
-void xen_post_suspend(int suspend_cancelled)
|
|
+void xen_arch_post_suspend(int suspend_cancelled)
|
|
{
|
|
xen_build_mfn_list_list();
|
|
|
|
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
|
|
index 067759e..2e2d370 100644
|
|
--- a/arch/x86/xen/time.c
|
|
+++ b/arch/x86/xen/time.c
|
|
@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu)
|
|
name = "<timer kasprintf failed>";
|
|
|
|
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
|
|
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
|
|
+ IRQF_DISABLED|IRQF_PERCPU|
|
|
+ IRQF_NOBALANCING|IRQF_TIMER|
|
|
+ IRQF_FORCE_RESUME,
|
|
name, NULL);
|
|
|
|
evt = &per_cpu(xen_clock_events, cpu);
|
|
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
|
|
index 9d41bf9..3112f55 100644
|
|
--- a/arch/x86/xen/xen-ops.h
|
|
+++ b/arch/x86/xen/xen-ops.h
|
|
@@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void);
|
|
|
|
#ifdef CONFIG_SMP
|
|
void xen_smp_init(void);
|
|
+void __init xen_hvm_smp_init(void);
|
|
|
|
extern cpumask_var_t xen_cpu_initialized_map;
|
|
#else
|
|
static inline void xen_smp_init(void) {}
|
|
+static inline void xen_hvm_smp_init(void) {}
|
|
#endif
|
|
|
|
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
|
diff --git a/block/blk-core.c b/block/blk-core.c
|
|
index 2f4002f..77836fc 100644
|
|
--- a/block/blk-core.c
|
|
+++ b/block/blk-core.c
|
|
@@ -455,6 +455,7 @@ void blk_put_queue(struct request_queue *q)
|
|
{
|
|
kobject_put(&q->kobj);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(blk_put_queue);
|
|
|
|
void blk_cleanup_queue(struct request_queue *q)
|
|
{
|
|
@@ -662,6 +663,7 @@ int blk_get_queue(struct request_queue *q)
|
|
|
|
return 1;
|
|
}
|
|
+EXPORT_SYMBOL_GPL(blk_get_queue);
|
|
|
|
static inline void blk_free_request(struct request_queue *q, struct request *rq)
|
|
{
|
|
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
|
|
index d7aa39e..9cb8668 100644
|
|
--- a/drivers/block/xen-blkfront.c
|
|
+++ b/drivers/block/xen-blkfront.c
|
|
@@ -120,6 +120,10 @@ static DEFINE_SPINLOCK(minor_lock);
|
|
#define EXTENDED (1<<EXT_SHIFT)
|
|
#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
|
|
#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
|
|
+#define EMULATED_HD_DISK_MINOR_OFFSET (0)
|
|
+#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
|
|
+#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
|
|
+#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
|
|
|
|
#define DEV_NAME "xvd" /* name in /dev */
|
|
|
|
@@ -281,7 +285,7 @@ static int blkif_queue_request(struct request *req)
|
|
info->shadow[id].request = req;
|
|
|
|
ring_req->id = id;
|
|
- ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
|
|
+ ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
|
|
ring_req->handle = info->handle;
|
|
|
|
ring_req->operation = rq_data_dir(req) ?
|
|
@@ -317,7 +321,7 @@ static int blkif_queue_request(struct request *req)
|
|
rq_data_dir(req) );
|
|
|
|
info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
|
|
- ring_req->seg[i] =
|
|
+ ring_req->u.rw.seg[i] =
|
|
(struct blkif_request_segment) {
|
|
.gref = ref,
|
|
.first_sect = fsect,
|
|
@@ -434,6 +438,65 @@ static void xlvbd_flush(struct blkfront_info *info)
|
|
info->feature_flush ? "enabled" : "disabled");
|
|
}
|
|
|
|
+static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
|
|
+{
|
|
+ int major;
|
|
+ major = BLKIF_MAJOR(vdevice);
|
|
+ *minor = BLKIF_MINOR(vdevice);
|
|
+ switch (major) {
|
|
+ case XEN_IDE0_MAJOR:
|
|
+ *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
|
|
+ *minor = ((*minor / 64) * PARTS_PER_DISK) +
|
|
+ EMULATED_HD_DISK_MINOR_OFFSET;
|
|
+ break;
|
|
+ case XEN_IDE1_MAJOR:
|
|
+ *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
|
|
+ *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
|
|
+ EMULATED_HD_DISK_MINOR_OFFSET;
|
|
+ break;
|
|
+ case XEN_SCSI_DISK0_MAJOR:
|
|
+ *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
|
|
+ *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
|
|
+ break;
|
|
+ case XEN_SCSI_DISK1_MAJOR:
|
|
+ case XEN_SCSI_DISK2_MAJOR:
|
|
+ case XEN_SCSI_DISK3_MAJOR:
|
|
+ case XEN_SCSI_DISK4_MAJOR:
|
|
+ case XEN_SCSI_DISK5_MAJOR:
|
|
+ case XEN_SCSI_DISK6_MAJOR:
|
|
+ case XEN_SCSI_DISK7_MAJOR:
|
|
+ *offset = (*minor / PARTS_PER_DISK) +
|
|
+ ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
|
|
+ EMULATED_SD_DISK_NAME_OFFSET;
|
|
+ *minor = *minor +
|
|
+ ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
|
|
+ EMULATED_SD_DISK_MINOR_OFFSET;
|
|
+ break;
|
|
+ case XEN_SCSI_DISK8_MAJOR:
|
|
+ case XEN_SCSI_DISK9_MAJOR:
|
|
+ case XEN_SCSI_DISK10_MAJOR:
|
|
+ case XEN_SCSI_DISK11_MAJOR:
|
|
+ case XEN_SCSI_DISK12_MAJOR:
|
|
+ case XEN_SCSI_DISK13_MAJOR:
|
|
+ case XEN_SCSI_DISK14_MAJOR:
|
|
+ case XEN_SCSI_DISK15_MAJOR:
|
|
+ *offset = (*minor / PARTS_PER_DISK) +
|
|
+ ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
|
|
+ EMULATED_SD_DISK_NAME_OFFSET;
|
|
+ *minor = *minor +
|
|
+ ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
|
|
+ EMULATED_SD_DISK_MINOR_OFFSET;
|
|
+ break;
|
|
+ case XENVBD_MAJOR:
|
|
+ *offset = *minor / PARTS_PER_DISK;
|
|
+ break;
|
|
+ default:
|
|
+ printk(KERN_WARNING "blkfront: your disk configuration is "
|
|
+ "incorrect, please use an xvd device instead\n");
|
|
+ return -ENODEV;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
|
|
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
|
struct blkfront_info *info,
|
|
@@ -441,7 +504,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
|
{
|
|
struct gendisk *gd;
|
|
int nr_minors = 1;
|
|
- int err = -ENODEV;
|
|
+ int err;
|
|
unsigned int offset;
|
|
int minor;
|
|
int nr_parts;
|
|
@@ -456,12 +519,20 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
|
}
|
|
|
|
if (!VDEV_IS_EXTENDED(info->vdevice)) {
|
|
- minor = BLKIF_MINOR(info->vdevice);
|
|
- nr_parts = PARTS_PER_DISK;
|
|
+ err = xen_translate_vdev(info->vdevice, &minor, &offset);
|
|
+ if (err)
|
|
+ return err;
|
|
+ nr_parts = PARTS_PER_DISK;
|
|
} else {
|
|
minor = BLKIF_MINOR_EXT(info->vdevice);
|
|
nr_parts = PARTS_PER_EXT_DISK;
|
|
+ offset = minor / nr_parts;
|
|
+ if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
|
|
+ printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
|
|
+ "emulated IDE disks,\n\t choose an xvd device name"
|
|
+ "from xvde on\n", info->vdevice);
|
|
}
|
|
+ err = -ENODEV;
|
|
|
|
if ((minor % nr_parts) == 0)
|
|
nr_minors = nr_parts;
|
|
@@ -475,8 +546,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
|
if (gd == NULL)
|
|
goto release;
|
|
|
|
- offset = minor / nr_parts;
|
|
-
|
|
if (nr_minors > 1) {
|
|
if (offset < 26)
|
|
sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
|
|
@@ -615,7 +684,7 @@ static void blkif_completion(struct blk_shadow *s)
|
|
{
|
|
int i;
|
|
for (i = 0; i < s->req.nr_segments; i++)
|
|
- gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
|
|
+ gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
|
|
}
|
|
|
|
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
|
@@ -932,7 +1001,7 @@ static int blkif_recover(struct blkfront_info *info)
|
|
/* Rewrite any grant references invalidated by susp/resume. */
|
|
for (j = 0; j < req->nr_segments; j++)
|
|
gnttab_grant_foreign_access_ref(
|
|
- req->seg[j].gref,
|
|
+ req->u.rw.seg[j].gref,
|
|
info->xbdev->otherend_id,
|
|
pfn_to_mfn(info->shadow[req->id].frame[j]),
|
|
rq_data_dir(info->shadow[req->id].request));
|
|
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
|
|
index 26347b7..3706156 100644
|
|
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
|
|
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
|
|
@@ -412,7 +412,8 @@ nouveau_mem_vram_init(struct drm_device *dev)
|
|
ret = ttm_bo_device_init(&dev_priv->ttm.bdev,
|
|
dev_priv->ttm.bo_global_ref.ref.object,
|
|
&nouveau_bo_driver, DRM_FILE_PAGE_OFFSET,
|
|
- dma_bits <= 32 ? true : false);
|
|
+ dma_bits <= 32 ? true : false,
|
|
+ dev->dev);
|
|
if (ret) {
|
|
NV_ERROR(dev, "Error initialising bo driver: %d\n", ret);
|
|
return ret;
|
|
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
|
|
index 9a250eb..07b1151 100644
|
|
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
|
|
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
|
|
@@ -12,6 +12,7 @@ struct nouveau_sgdma_be {
|
|
struct drm_device *dev;
|
|
|
|
dma_addr_t *pages;
|
|
+ bool *ttm_alloced;
|
|
unsigned nr_pages;
|
|
|
|
u64 offset;
|
|
@@ -20,7 +21,8 @@ struct nouveau_sgdma_be {
|
|
|
|
static int
|
|
nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
|
|
- struct page **pages, struct page *dummy_read_page)
|
|
+ struct page **pages, struct page *dummy_read_page,
|
|
+ dma_addr_t *dma_addrs)
|
|
{
|
|
struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
|
|
struct drm_device *dev = nvbe->dev;
|
|
@@ -34,15 +36,25 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
|
|
if (!nvbe->pages)
|
|
return -ENOMEM;
|
|
|
|
+ nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL);
|
|
+ if (!nvbe->ttm_alloced)
|
|
+ return -ENOMEM;
|
|
+
|
|
nvbe->nr_pages = 0;
|
|
while (num_pages--) {
|
|
- nvbe->pages[nvbe->nr_pages] =
|
|
- pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
|
|
+ if (dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE) {
|
|
+ nvbe->pages[nvbe->nr_pages] =
|
|
+ dma_addrs[nvbe->nr_pages];
|
|
+ nvbe->ttm_alloced[nvbe->nr_pages] = true;
|
|
+ } else {
|
|
+ nvbe->pages[nvbe->nr_pages] =
|
|
+ pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
|
|
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
|
|
- if (pci_dma_mapping_error(dev->pdev,
|
|
- nvbe->pages[nvbe->nr_pages])) {
|
|
- be->func->clear(be);
|
|
- return -EFAULT;
|
|
+ if (pci_dma_mapping_error(dev->pdev,
|
|
+ nvbe->pages[nvbe->nr_pages])) {
|
|
+ be->func->clear(be);
|
|
+ return -EFAULT;
|
|
+ }
|
|
}
|
|
|
|
nvbe->nr_pages++;
|
|
@@ -65,11 +77,14 @@ nouveau_sgdma_clear(struct ttm_backend *be)
|
|
be->func->unbind(be);
|
|
|
|
while (nvbe->nr_pages--) {
|
|
- pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
|
|
+ if (!nvbe->ttm_alloced[nvbe->nr_pages])
|
|
+ pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
|
|
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
|
|
}
|
|
kfree(nvbe->pages);
|
|
+ kfree(nvbe->ttm_alloced);
|
|
nvbe->pages = NULL;
|
|
+ nvbe->ttm_alloced = NULL;
|
|
nvbe->nr_pages = 0;
|
|
}
|
|
}
|
|
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
|
|
index d270b3f..f643133 100644
|
|
--- a/drivers/gpu/drm/radeon/evergreen.c
|
|
+++ b/drivers/gpu/drm/radeon/evergreen.c
|
|
@@ -3048,9 +3048,6 @@ int evergreen_init(struct radeon_device *rdev)
|
|
{
|
|
int r;
|
|
|
|
- r = radeon_dummy_page_init(rdev);
|
|
- if (r)
|
|
- return r;
|
|
/* This don't do much */
|
|
r = radeon_gem_init(rdev);
|
|
if (r)
|
|
@@ -3162,7 +3159,6 @@ void evergreen_fini(struct radeon_device *rdev)
|
|
radeon_atombios_fini(rdev);
|
|
kfree(rdev->bios);
|
|
rdev->bios = NULL;
|
|
- radeon_dummy_page_fini(rdev);
|
|
}
|
|
|
|
static void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
|
|
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
|
|
index de88624..36efc45 100644
|
|
--- a/drivers/gpu/drm/radeon/r600.c
|
|
+++ b/drivers/gpu/drm/radeon/r600.c
|
|
@@ -2509,9 +2509,6 @@ int r600_init(struct radeon_device *rdev)
|
|
{
|
|
int r;
|
|
|
|
- r = radeon_dummy_page_init(rdev);
|
|
- if (r)
|
|
- return r;
|
|
if (r600_debugfs_mc_info_init(rdev)) {
|
|
DRM_ERROR("Failed to register debugfs file for mc !\n");
|
|
}
|
|
@@ -2625,7 +2622,6 @@ void r600_fini(struct radeon_device *rdev)
|
|
radeon_atombios_fini(rdev);
|
|
kfree(rdev->bios);
|
|
rdev->bios = NULL;
|
|
- radeon_dummy_page_fini(rdev);
|
|
}
|
|
|
|
|
|
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
|
|
index 56c48b6..c5955d3 100644
|
|
--- a/drivers/gpu/drm/radeon/radeon.h
|
|
+++ b/drivers/gpu/drm/radeon/radeon.h
|
|
@@ -319,6 +319,7 @@ struct radeon_gart {
|
|
union radeon_gart_table table;
|
|
struct page **pages;
|
|
dma_addr_t *pages_addr;
|
|
+ bool *ttm_alloced;
|
|
bool ready;
|
|
};
|
|
|
|
@@ -331,7 +332,8 @@ void radeon_gart_fini(struct radeon_device *rdev);
|
|
void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
|
|
int pages);
|
|
int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
|
|
- int pages, struct page **pagelist);
|
|
+ int pages, struct page **pagelist,
|
|
+ dma_addr_t *dma_addr);
|
|
|
|
|
|
/*
|
|
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
|
|
index 6501611..de4a86f 100644
|
|
--- a/drivers/gpu/drm/radeon/radeon_gart.c
|
|
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
|
|
@@ -149,8 +149,9 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
|
|
p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
|
|
for (i = 0; i < pages; i++, p++) {
|
|
if (rdev->gart.pages[p]) {
|
|
- pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
|
|
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
|
|
+ if (!rdev->gart.ttm_alloced[p])
|
|
+ pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
|
|
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
|
|
rdev->gart.pages[p] = NULL;
|
|
rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
|
|
page_base = rdev->gart.pages_addr[p];
|
|
@@ -165,7 +166,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
|
|
}
|
|
|
|
int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
|
|
- int pages, struct page **pagelist)
|
|
+ int pages, struct page **pagelist, dma_addr_t *dma_addr)
|
|
{
|
|
unsigned t;
|
|
unsigned p;
|
|
@@ -180,15 +181,22 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
|
|
p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
|
|
|
|
for (i = 0; i < pages; i++, p++) {
|
|
- /* we need to support large memory configurations */
|
|
- /* assume that unbind have already been call on the range */
|
|
- rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
|
|
+ /* On TTM path, we only use the DMA API if TTM_PAGE_FLAG_DMA32
|
|
+ * is requested. */
|
|
+ if (dma_addr[i] != DMA_ERROR_CODE) {
|
|
+ rdev->gart.ttm_alloced[p] = true;
|
|
+ rdev->gart.pages_addr[p] = dma_addr[i];
|
|
+ } else {
|
|
+ /* we need to support large memory configurations */
|
|
+ /* assume that unbind have already been call on the range */
|
|
+ rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
|
|
0, PAGE_SIZE,
|
|
PCI_DMA_BIDIRECTIONAL);
|
|
- if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
|
|
- /* FIXME: failed to map page (return -ENOMEM?) */
|
|
- radeon_gart_unbind(rdev, offset, pages);
|
|
- return -ENOMEM;
|
|
+ if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
|
|
+ /* FIXME: failed to map page (return -ENOMEM?) */
|
|
+ radeon_gart_unbind(rdev, offset, pages);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
}
|
|
rdev->gart.pages[p] = pagelist[i];
|
|
page_base = rdev->gart.pages_addr[p];
|
|
@@ -251,6 +259,12 @@ int radeon_gart_init(struct radeon_device *rdev)
|
|
radeon_gart_fini(rdev);
|
|
return -ENOMEM;
|
|
}
|
|
+ rdev->gart.ttm_alloced = kzalloc(sizeof(bool) *
|
|
+ rdev->gart.num_cpu_pages, GFP_KERNEL);
|
|
+ if (rdev->gart.ttm_alloced == NULL) {
|
|
+ radeon_gart_fini(rdev);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
/* set GART entry to point to the dummy page by default */
|
|
for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
|
|
rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
|
|
@@ -267,6 +281,9 @@ void radeon_gart_fini(struct radeon_device *rdev)
|
|
rdev->gart.ready = false;
|
|
kfree(rdev->gart.pages);
|
|
kfree(rdev->gart.pages_addr);
|
|
+ kfree(rdev->gart.ttm_alloced);
|
|
rdev->gart.pages = NULL;
|
|
rdev->gart.pages_addr = NULL;
|
|
+ rdev->gart.ttm_alloced = NULL;
|
|
+ radeon_dummy_page_fini(rdev);
|
|
}
|
|
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
|
|
index e5b2cf1..371890c 100644
|
|
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
|
|
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
|
|
@@ -517,7 +517,8 @@ int radeon_ttm_init(struct radeon_device *rdev)
|
|
r = ttm_bo_device_init(&rdev->mman.bdev,
|
|
rdev->mman.bo_global_ref.ref.object,
|
|
&radeon_bo_driver, DRM_FILE_PAGE_OFFSET,
|
|
- rdev->need_dma32);
|
|
+ rdev->need_dma32,
|
|
+ rdev->dev);
|
|
if (r) {
|
|
DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
|
|
return r;
|
|
@@ -647,6 +648,7 @@ struct radeon_ttm_backend {
|
|
unsigned long num_pages;
|
|
struct page **pages;
|
|
struct page *dummy_read_page;
|
|
+ dma_addr_t *dma_addrs;
|
|
bool populated;
|
|
bool bound;
|
|
unsigned offset;
|
|
@@ -655,12 +657,14 @@ struct radeon_ttm_backend {
|
|
static int radeon_ttm_backend_populate(struct ttm_backend *backend,
|
|
unsigned long num_pages,
|
|
struct page **pages,
|
|
- struct page *dummy_read_page)
|
|
+ struct page *dummy_read_page,
|
|
+ dma_addr_t *dma_addrs)
|
|
{
|
|
struct radeon_ttm_backend *gtt;
|
|
|
|
gtt = container_of(backend, struct radeon_ttm_backend, backend);
|
|
gtt->pages = pages;
|
|
+ gtt->dma_addrs = dma_addrs;
|
|
gtt->num_pages = num_pages;
|
|
gtt->dummy_read_page = dummy_read_page;
|
|
gtt->populated = true;
|
|
@@ -673,6 +677,7 @@ static void radeon_ttm_backend_clear(struct ttm_backend *backend)
|
|
|
|
gtt = container_of(backend, struct radeon_ttm_backend, backend);
|
|
gtt->pages = NULL;
|
|
+ gtt->dma_addrs = NULL;
|
|
gtt->num_pages = 0;
|
|
gtt->dummy_read_page = NULL;
|
|
gtt->populated = false;
|
|
@@ -693,7 +698,7 @@ static int radeon_ttm_backend_bind(struct ttm_backend *backend,
|
|
gtt->num_pages, bo_mem, backend);
|
|
}
|
|
r = radeon_gart_bind(gtt->rdev, gtt->offset,
|
|
- gtt->num_pages, gtt->pages);
|
|
+ gtt->num_pages, gtt->pages, gtt->dma_addrs);
|
|
if (r) {
|
|
DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
|
|
gtt->num_pages, gtt->offset);
|
|
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
|
|
index d8ba676..6a312e6 100644
|
|
--- a/drivers/gpu/drm/radeon/rv770.c
|
|
+++ b/drivers/gpu/drm/radeon/rv770.c
|
|
@@ -1256,9 +1256,6 @@ int rv770_init(struct radeon_device *rdev)
|
|
{
|
|
int r;
|
|
|
|
- r = radeon_dummy_page_init(rdev);
|
|
- if (r)
|
|
- return r;
|
|
/* This don't do much */
|
|
r = radeon_gem_init(rdev);
|
|
if (r)
|
|
@@ -1373,7 +1370,6 @@ void rv770_fini(struct radeon_device *rdev)
|
|
radeon_atombios_fini(rdev);
|
|
kfree(rdev->bios);
|
|
rdev->bios = NULL;
|
|
- radeon_dummy_page_fini(rdev);
|
|
}
|
|
|
|
static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
|
|
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
|
|
index f999e36..1c4a72f 100644
|
|
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
|
|
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
|
|
@@ -47,7 +47,8 @@ struct ttm_agp_backend {
|
|
|
|
static int ttm_agp_populate(struct ttm_backend *backend,
|
|
unsigned long num_pages, struct page **pages,
|
|
- struct page *dummy_read_page)
|
|
+ struct page *dummy_read_page,
|
|
+ dma_addr_t *dma_addrs)
|
|
{
|
|
struct ttm_agp_backend *agp_be =
|
|
container_of(backend, struct ttm_agp_backend, backend);
|
|
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
|
|
index af61fc2..278a2d3 100644
|
|
--- a/drivers/gpu/drm/ttm/ttm_bo.c
|
|
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
|
|
@@ -1526,12 +1526,14 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
|
|
struct ttm_bo_global *glob,
|
|
struct ttm_bo_driver *driver,
|
|
uint64_t file_page_offset,
|
|
- bool need_dma32)
|
|
+ bool need_dma32,
|
|
+ struct device *dev)
|
|
{
|
|
int ret = -EINVAL;
|
|
|
|
rwlock_init(&bdev->vm_lock);
|
|
bdev->driver = driver;
|
|
+ bdev->dev = dev;
|
|
|
|
memset(bdev->man, 0, sizeof(bdev->man));
|
|
|
|
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
|
|
index b1e02ff..35849db 100644
|
|
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
|
|
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
|
|
@@ -38,6 +38,7 @@
|
|
#include <linux/mm.h>
|
|
#include <linux/seq_file.h> /* for seq_printf */
|
|
#include <linux/slab.h>
|
|
+#include <linux/dma-mapping.h>
|
|
|
|
#include <asm/atomic.h>
|
|
|
|
@@ -662,7 +663,8 @@ out:
|
|
* cached pages.
|
|
*/
|
|
int ttm_get_pages(struct list_head *pages, int flags,
|
|
- enum ttm_caching_state cstate, unsigned count)
|
|
+ enum ttm_caching_state cstate, unsigned count,
|
|
+ dma_addr_t *dma_address, struct device *dev)
|
|
{
|
|
struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
|
|
struct page *p = NULL;
|
|
@@ -681,14 +683,22 @@ int ttm_get_pages(struct list_head *pages, int flags,
|
|
gfp_flags |= GFP_HIGHUSER;
|
|
|
|
for (r = 0; r < count; ++r) {
|
|
- p = alloc_page(gfp_flags);
|
|
+ if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
|
|
+ void *addr;
|
|
+ addr = dma_alloc_coherent(dev, PAGE_SIZE,
|
|
+ &dma_address[r],
|
|
+ gfp_flags);
|
|
+ if (addr == NULL)
|
|
+ return -ENOMEM;
|
|
+ p = virt_to_page(addr);
|
|
+ } else
|
|
+ p = alloc_page(gfp_flags);
|
|
if (!p) {
|
|
|
|
printk(KERN_ERR TTM_PFX
|
|
"Unable to allocate page.");
|
|
return -ENOMEM;
|
|
}
|
|
-
|
|
list_add(&p->lru, pages);
|
|
}
|
|
return 0;
|
|
@@ -720,7 +730,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
|
|
printk(KERN_ERR TTM_PFX
|
|
"Failed to allocate extra pages "
|
|
"for large request.");
|
|
- ttm_put_pages(pages, 0, flags, cstate);
|
|
+ ttm_put_pages(pages, 0, flags, cstate, NULL, NULL);
|
|
return r;
|
|
}
|
|
}
|
|
@@ -731,17 +741,30 @@ int ttm_get_pages(struct list_head *pages, int flags,
|
|
|
|
/* Put all pages in pages list to correct pool to wait for reuse */
|
|
void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
|
|
- enum ttm_caching_state cstate)
|
|
+ enum ttm_caching_state cstate, dma_addr_t *dma_address,
|
|
+ struct device *dev)
|
|
{
|
|
unsigned long irq_flags;
|
|
struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
|
|
struct page *p, *tmp;
|
|
+ unsigned r;
|
|
|
|
if (pool == NULL) {
|
|
/* No pool for this memory type so free the pages */
|
|
|
|
+ r = page_count-1;
|
|
list_for_each_entry_safe(p, tmp, pages, lru) {
|
|
- __free_page(p);
|
|
+ if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
|
|
+ void *addr = page_address(p);
|
|
+ WARN_ON(!addr || !dma_address[r]);
|
|
+ if (addr)
|
|
+ dma_free_coherent(dev, PAGE_SIZE,
|
|
+ addr,
|
|
+ dma_address[r]);
|
|
+ dma_address[r] = 0;
|
|
+ } else
|
|
+ __free_page(p);
|
|
+ r--;
|
|
}
|
|
/* Make the pages list empty */
|
|
INIT_LIST_HEAD(pages);
|
|
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
|
|
index af789dc..354f9d9 100644
|
|
--- a/drivers/gpu/drm/ttm/ttm_tt.c
|
|
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
|
|
@@ -49,12 +49,16 @@ static int ttm_tt_swapin(struct ttm_tt *ttm);
|
|
static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm)
|
|
{
|
|
ttm->pages = drm_calloc_large(ttm->num_pages, sizeof(*ttm->pages));
|
|
+ ttm->dma_address = drm_calloc_large(ttm->num_pages,
|
|
+ sizeof(*ttm->dma_address));
|
|
}
|
|
|
|
static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
|
|
{
|
|
drm_free_large(ttm->pages);
|
|
ttm->pages = NULL;
|
|
+ drm_free_large(ttm->dma_address);
|
|
+ ttm->dma_address = NULL;
|
|
}
|
|
|
|
static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
|
|
@@ -105,7 +109,8 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
|
|
|
|
INIT_LIST_HEAD(&h);
|
|
|
|
- ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
|
|
+ ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
|
|
+ &ttm->dma_address[index], ttm->dev);
|
|
|
|
if (ret != 0)
|
|
return NULL;
|
|
@@ -164,7 +169,7 @@ int ttm_tt_populate(struct ttm_tt *ttm)
|
|
}
|
|
|
|
be->func->populate(be, ttm->num_pages, ttm->pages,
|
|
- ttm->dummy_read_page);
|
|
+ ttm->dummy_read_page, ttm->dma_address);
|
|
ttm->state = tt_unbound;
|
|
return 0;
|
|
}
|
|
@@ -298,7 +303,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
|
|
count++;
|
|
}
|
|
}
|
|
- ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state);
|
|
+ ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
|
|
+ ttm->dma_address, ttm->dev);
|
|
ttm->state = tt_unpopulated;
|
|
ttm->first_himem_page = ttm->num_pages;
|
|
ttm->last_lomem_page = -1;
|
|
@@ -391,6 +397,7 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, unsigned long size,
|
|
ttm->last_lomem_page = -1;
|
|
ttm->caching_state = tt_cached;
|
|
ttm->page_flags = page_flags;
|
|
+ ttm->dev = bdev->dev;
|
|
|
|
ttm->dummy_read_page = dummy_read_page;
|
|
|
|
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
|
|
index 80bc37b..87e43e0 100644
|
|
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
|
|
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
|
|
@@ -102,7 +102,8 @@ struct vmw_ttm_backend {
|
|
|
|
static int vmw_ttm_populate(struct ttm_backend *backend,
|
|
unsigned long num_pages, struct page **pages,
|
|
- struct page *dummy_read_page)
|
|
+ struct page *dummy_read_page,
|
|
+ dma_addr_t *dma_addrs)
|
|
{
|
|
struct vmw_ttm_backend *vmw_be =
|
|
container_of(backend, struct vmw_ttm_backend, backend);
|
|
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
|
|
index 10ca97e..803d979 100644
|
|
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
|
|
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
|
|
@@ -322,11 +322,11 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
|
|
ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
|
|
dev_priv->active_master = &dev_priv->fbdev_master;
|
|
|
|
-
|
|
ret = ttm_bo_device_init(&dev_priv->bdev,
|
|
dev_priv->bo_global_ref.ref.object,
|
|
&vmw_bo_driver, VMWGFX_FILE_PAGE_OFFSET,
|
|
- false);
|
|
+ false,
|
|
+ dev->dev);
|
|
if (unlikely(ret != 0)) {
|
|
DRM_ERROR("Failed initializing TTM buffer object driver.\n");
|
|
goto out_err1;
|
|
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
|
|
index 0382332..1826d5d 100644
|
|
--- a/drivers/net/Kconfig
|
|
+++ b/drivers/net/Kconfig
|
|
@@ -2966,12 +2966,38 @@ config XEN_NETDEV_FRONTEND
|
|
select XEN_XENBUS_FRONTEND
|
|
default y
|
|
help
|
|
- The network device frontend driver allows the kernel to
|
|
- access network devices exported exported by a virtual
|
|
- machine containing a physical network device driver. The
|
|
- frontend driver is intended for unprivileged guest domains;
|
|
- if you are compiling a kernel for a Xen guest, you almost
|
|
- certainly want to enable this.
|
|
+ This driver provides support for Xen paravirtual network
|
|
+ devices exported by a Xen network driver domain (often
|
|
+ domain 0).
|
|
+
|
|
+ The corresponding Linux backend driver is enabled by the
|
|
+ CONFIG_XEN_NETDEV_BACKEND option.
|
|
+
|
|
+ If you are compiling a kernel for use as Xen guest, you
|
|
+ should say Y here. To compile this driver as a module, chose
|
|
+ M here: the module will be called xen-netfront.
|
|
+
|
|
+config XEN_NETDEV_BACKEND
|
|
+ tristate "Xen backend network device"
|
|
+ depends on XEN_BACKEND
|
|
+ help
|
|
+ This driver allows the kernel to act as a Xen network driver
|
|
+ domain which exports paravirtual network devices to other
|
|
+ Xen domains. These devices can be accessed by any operating
|
|
+ system that implements a compatible front end.
|
|
+
|
|
+ The corresponding Linux frontend driver is enabled by the
|
|
+ CONFIG_XEN_NETDEV_FRONTEND configuration option.
|
|
+
|
|
+ The backend driver presents a standard network device
|
|
+ endpoint for each paravirtual network device to the driver
|
|
+ domain network stack. These can then be bridged or routed
|
|
+ etc in order to provide full network connectivity.
|
|
+
|
|
+ If you are compiling a kernel to run in a Xen network driver
|
|
+ domain (often this is domain 0) you should say Y here. To
|
|
+ compile this driver as a module, chose M here: the module
|
|
+ will be called xen-netback.
|
|
|
|
config ISERIES_VETH
|
|
tristate "iSeries Virtual Ethernet driver support"
|
|
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
|
|
index b90738d..145dfd7 100644
|
|
--- a/drivers/net/Makefile
|
|
+++ b/drivers/net/Makefile
|
|
@@ -171,6 +171,7 @@ obj-$(CONFIG_SLIP) += slip.o
|
|
obj-$(CONFIG_SLHC) += slhc.o
|
|
|
|
obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
|
|
+obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
|
|
|
|
obj-$(CONFIG_DUMMY) += dummy.o
|
|
obj-$(CONFIG_IFB) += ifb.o
|
|
diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile
|
|
new file mode 100644
|
|
index 0000000..e346e81
|
|
--- /dev/null
|
|
+++ b/drivers/net/xen-netback/Makefile
|
|
@@ -0,0 +1,3 @@
|
|
+obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
|
|
+
|
|
+xen-netback-y := netback.o xenbus.o interface.o
|
|
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
|
|
new file mode 100644
|
|
index 0000000..21f4c0c
|
|
--- /dev/null
|
|
+++ b/drivers/net/xen-netback/common.h
|
|
@@ -0,0 +1,162 @@
|
|
+/*
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_NETBACK__COMMON_H__
|
|
+#define __XEN_NETBACK__COMMON_H__
|
|
+
|
|
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/ip.h>
|
|
+#include <linux/in.h>
|
|
+#include <linux/io.h>
|
|
+#include <linux/netdevice.h>
|
|
+#include <linux/etherdevice.h>
|
|
+#include <linux/wait.h>
|
|
+#include <linux/sched.h>
|
|
+
|
|
+#include <xen/interface/io/netif.h>
|
|
+#include <xen/interface/grant_table.h>
|
|
+#include <xen/grant_table.h>
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+struct xen_netbk;
|
|
+
|
|
+struct xenvif {
|
|
+ /* Unique identifier for this interface. */
|
|
+ domid_t domid;
|
|
+ unsigned int handle;
|
|
+
|
|
+ /* Reference to netback processing backend. */
|
|
+ struct xen_netbk *netbk;
|
|
+
|
|
+ u8 fe_dev_addr[6];
|
|
+
|
|
+ /* Physical parameters of the comms window. */
|
|
+ grant_handle_t tx_shmem_handle;
|
|
+ grant_ref_t tx_shmem_ref;
|
|
+ grant_handle_t rx_shmem_handle;
|
|
+ grant_ref_t rx_shmem_ref;
|
|
+ unsigned int irq;
|
|
+
|
|
+ /* List of frontends to notify after a batch of frames sent. */
|
|
+ struct list_head notify_list;
|
|
+
|
|
+ /* The shared rings and indexes. */
|
|
+ struct xen_netif_tx_back_ring tx;
|
|
+ struct xen_netif_rx_back_ring rx;
|
|
+ struct vm_struct *tx_comms_area;
|
|
+ struct vm_struct *rx_comms_area;
|
|
+
|
|
+ /* Flags that must not be set in dev->features */
|
|
+ int features_disabled;
|
|
+
|
|
+ /* Frontend feature information. */
|
|
+ u8 can_sg:1;
|
|
+ u8 gso:1;
|
|
+ u8 gso_prefix:1;
|
|
+ u8 csum:1;
|
|
+
|
|
+ /* Internal feature information. */
|
|
+ u8 can_queue:1; /* can queue packets for receiver? */
|
|
+
|
|
+ /*
|
|
+ * Allow xenvif_start_xmit() to peek ahead in the rx request
|
|
+ * ring. This is a prediction of what rx_req_cons will be
|
|
+ * once all queued skbs are put on the ring.
|
|
+ */
|
|
+ RING_IDX rx_req_cons_peek;
|
|
+
|
|
+ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
|
|
+ unsigned long credit_bytes;
|
|
+ unsigned long credit_usec;
|
|
+ unsigned long remaining_credit;
|
|
+ struct timer_list credit_timeout;
|
|
+
|
|
+ /* Statistics */
|
|
+ int rx_gso_checksum_fixup;
|
|
+
|
|
+ /* Miscellaneous private stuff. */
|
|
+ struct list_head schedule_list;
|
|
+ atomic_t refcnt;
|
|
+ struct net_device *dev;
|
|
+ struct net_device_stats stats;
|
|
+
|
|
+ wait_queue_head_t waiting_to_free;
|
|
+};
|
|
+
|
|
+#define XEN_NETIF_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
|
|
+#define XEN_NETIF_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
|
|
+
|
|
+struct xenvif *xenvif_alloc(struct device *parent,
|
|
+ domid_t domid,
|
|
+ unsigned int handle);
|
|
+
|
|
+int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
|
|
+ unsigned long rx_ring_ref, unsigned int evtchn);
|
|
+void xenvif_disconnect(struct xenvif *vif);
|
|
+
|
|
+void xenvif_get(struct xenvif *vif);
|
|
+void xenvif_put(struct xenvif *vif);
|
|
+
|
|
+int xenvif_xenbus_init(void);
|
|
+
|
|
+int xenvif_schedulable(struct xenvif *vif);
|
|
+
|
|
+int xen_netbk_rx_ring_full(struct xenvif *vif);
|
|
+
|
|
+int xen_netbk_must_stop_queue(struct xenvif *vif);
|
|
+
|
|
+/* (Un)Map communication rings. */
|
|
+void xen_netbk_unmap_frontend_rings(struct xenvif *vif);
|
|
+int xen_netbk_map_frontend_rings(struct xenvif *vif,
|
|
+ grant_ref_t tx_ring_ref,
|
|
+ grant_ref_t rx_ring_ref);
|
|
+
|
|
+/* (De)Register a xenvif with the netback backend. */
|
|
+void xen_netbk_add_xenvif(struct xenvif *vif);
|
|
+void xen_netbk_remove_xenvif(struct xenvif *vif);
|
|
+
|
|
+/* (De)Schedule backend processing for a xenvif */
|
|
+void xen_netbk_schedule_xenvif(struct xenvif *vif);
|
|
+void xen_netbk_deschedule_xenvif(struct xenvif *vif);
|
|
+
|
|
+/* Check for SKBs from frontend and schedule backend processing */
|
|
+void xen_netbk_check_rx_xenvif(struct xenvif *vif);
|
|
+/* Receive an SKB from the frontend */
|
|
+void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb);
|
|
+
|
|
+/* Queue an SKB for transmission to the frontend */
|
|
+void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
|
|
+/* Notify xenvif that ring now has space to send an skb to the frontend */
|
|
+void xenvif_notify_tx_completion(struct xenvif *vif);
|
|
+
|
|
+/* Returns number of ring slots required to send an skb to the frontend */
|
|
+unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
|
|
+
|
|
+#endif /* __XEN_NETBACK__COMMON_H__ */
|
|
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
|
|
new file mode 100644
|
|
index 0000000..1614ba5
|
|
--- /dev/null
|
|
+++ b/drivers/net/xen-netback/interface.c
|
|
@@ -0,0 +1,424 @@
|
|
+/*
|
|
+ * Network-device interface management.
|
|
+ *
|
|
+ * Copyright (c) 2004-2005, Keir Fraser
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+
|
|
+#include <linux/ethtool.h>
|
|
+#include <linux/rtnetlink.h>
|
|
+
|
|
+#include <xen/events.h>
|
|
+#include <asm/xen/hypercall.h>
|
|
+
|
|
+#define XENVIF_QUEUE_LENGTH 32
|
|
+
|
|
+void xenvif_get(struct xenvif *vif)
|
|
+{
|
|
+ atomic_inc(&vif->refcnt);
|
|
+}
|
|
+
|
|
+void xenvif_put(struct xenvif *vif)
|
|
+{
|
|
+ if (atomic_dec_and_test(&vif->refcnt))
|
|
+ wake_up(&vif->waiting_to_free);
|
|
+}
|
|
+
|
|
+int xenvif_schedulable(struct xenvif *vif)
|
|
+{
|
|
+ return netif_running(vif->dev) && netif_carrier_ok(vif->dev);
|
|
+}
|
|
+
|
|
+static int xenvif_rx_schedulable(struct xenvif *vif)
|
|
+{
|
|
+ return xenvif_schedulable(vif) && !xen_netbk_rx_ring_full(vif);
|
|
+}
|
|
+
|
|
+static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
|
|
+{
|
|
+ struct xenvif *vif = dev_id;
|
|
+
|
|
+ if (vif->netbk == NULL)
|
|
+ return IRQ_NONE;
|
|
+
|
|
+ xen_netbk_schedule_xenvif(vif);
|
|
+
|
|
+ if (xenvif_rx_schedulable(vif))
|
|
+ netif_wake_queue(vif->dev);
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
+{
|
|
+ struct xenvif *vif = netdev_priv(dev);
|
|
+
|
|
+ BUG_ON(skb->dev != dev);
|
|
+
|
|
+ if (vif->netbk == NULL)
|
|
+ goto drop;
|
|
+
|
|
+ /* Drop the packet if the target domain has no receive buffers. */
|
|
+ if (!xenvif_rx_schedulable(vif))
|
|
+ goto drop;
|
|
+
|
|
+ /* Reserve ring slots for the worst-case number of fragments. */
|
|
+ vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb);
|
|
+ xenvif_get(vif);
|
|
+
|
|
+ if (vif->can_queue && xen_netbk_must_stop_queue(vif))
|
|
+ netif_stop_queue(dev);
|
|
+
|
|
+ xen_netbk_queue_tx_skb(vif, skb);
|
|
+
|
|
+ return NETDEV_TX_OK;
|
|
+
|
|
+ drop:
|
|
+ vif->stats.tx_dropped++;
|
|
+ dev_kfree_skb(skb);
|
|
+ return NETDEV_TX_OK;
|
|
+}
|
|
+
|
|
+void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb)
|
|
+{
|
|
+ netif_rx_ni(skb);
|
|
+ vif->dev->last_rx = jiffies;
|
|
+}
|
|
+
|
|
+void xenvif_notify_tx_completion(struct xenvif *vif)
|
|
+{
|
|
+ if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif))
|
|
+ netif_wake_queue(vif->dev);
|
|
+}
|
|
+
|
|
+static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
|
|
+{
|
|
+ struct xenvif *vif = netdev_priv(dev);
|
|
+ return &vif->stats;
|
|
+}
|
|
+
|
|
+static void xenvif_up(struct xenvif *vif)
|
|
+{
|
|
+ xen_netbk_add_xenvif(vif);
|
|
+ enable_irq(vif->irq);
|
|
+ xen_netbk_check_rx_xenvif(vif);
|
|
+}
|
|
+
|
|
+static void xenvif_down(struct xenvif *vif)
|
|
+{
|
|
+ disable_irq(vif->irq);
|
|
+ xen_netbk_deschedule_xenvif(vif);
|
|
+ xen_netbk_remove_xenvif(vif);
|
|
+}
|
|
+
|
|
+static int xenvif_open(struct net_device *dev)
|
|
+{
|
|
+ struct xenvif *vif = netdev_priv(dev);
|
|
+ if (netif_carrier_ok(dev))
|
|
+ xenvif_up(vif);
|
|
+ netif_start_queue(dev);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xenvif_close(struct net_device *dev)
|
|
+{
|
|
+ struct xenvif *vif = netdev_priv(dev);
|
|
+ if (netif_carrier_ok(dev))
|
|
+ xenvif_down(vif);
|
|
+ netif_stop_queue(dev);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xenvif_change_mtu(struct net_device *dev, int mtu)
|
|
+{
|
|
+ struct xenvif *vif = netdev_priv(dev);
|
|
+ int max = vif->can_sg ? 65535 - ETH_HLEN : ETH_DATA_LEN;
|
|
+
|
|
+ if (mtu > max)
|
|
+ return -EINVAL;
|
|
+ dev->mtu = mtu;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void xenvif_set_features(struct xenvif *vif)
|
|
+{
|
|
+ struct net_device *dev = vif->dev;
|
|
+ int features = dev->features;
|
|
+
|
|
+ if (vif->can_sg)
|
|
+ features |= NETIF_F_SG;
|
|
+ if (vif->gso || vif->gso_prefix)
|
|
+ features |= NETIF_F_TSO;
|
|
+ if (vif->csum)
|
|
+ features |= NETIF_F_IP_CSUM;
|
|
+
|
|
+ features &= ~(vif->features_disabled);
|
|
+
|
|
+ if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
|
|
+ dev->mtu = ETH_DATA_LEN;
|
|
+
|
|
+ dev->features = features;
|
|
+}
|
|
+
|
|
+static int xenvif_set_tx_csum(struct net_device *dev, u32 data)
|
|
+{
|
|
+ struct xenvif *vif = netdev_priv(dev);
|
|
+ if (data) {
|
|
+ if (!vif->csum)
|
|
+ return -EOPNOTSUPP;
|
|
+ vif->features_disabled &= ~NETIF_F_IP_CSUM;
|
|
+ } else {
|
|
+ vif->features_disabled |= NETIF_F_IP_CSUM;
|
|
+ }
|
|
+
|
|
+ xenvif_set_features(vif);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xenvif_set_sg(struct net_device *dev, u32 data)
|
|
+{
|
|
+ struct xenvif *vif = netdev_priv(dev);
|
|
+ if (data) {
|
|
+ if (!vif->can_sg)
|
|
+ return -EOPNOTSUPP;
|
|
+ vif->features_disabled &= ~NETIF_F_SG;
|
|
+ } else {
|
|
+ vif->features_disabled |= NETIF_F_SG;
|
|
+ }
|
|
+
|
|
+ xenvif_set_features(vif);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xenvif_set_tso(struct net_device *dev, u32 data)
|
|
+{
|
|
+ struct xenvif *vif = netdev_priv(dev);
|
|
+ if (data) {
|
|
+ if (!vif->gso && !vif->gso_prefix)
|
|
+ return -EOPNOTSUPP;
|
|
+ vif->features_disabled &= ~NETIF_F_TSO;
|
|
+ } else {
|
|
+ vif->features_disabled |= NETIF_F_TSO;
|
|
+ }
|
|
+
|
|
+ xenvif_set_features(vif);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static const struct xenvif_stat {
|
|
+ char name[ETH_GSTRING_LEN];
|
|
+ u16 offset;
|
|
+} xenvif_stats[] = {
|
|
+ {
|
|
+ "rx_gso_checksum_fixup",
|
|
+ offsetof(struct xenvif, rx_gso_checksum_fixup)
|
|
+ },
|
|
+};
|
|
+
|
|
+static int xenvif_get_sset_count(struct net_device *dev, int string_set)
|
|
+{
|
|
+ switch (string_set) {
|
|
+ case ETH_SS_STATS:
|
|
+ return ARRAY_SIZE(xenvif_stats);
|
|
+ default:
|
|
+ return -EINVAL;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void xenvif_get_ethtool_stats(struct net_device *dev,
|
|
+ struct ethtool_stats *stats, u64 * data)
|
|
+{
|
|
+ void *vif = netdev_priv(dev);
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
|
|
+ data[i] = *(int *)(vif + xenvif_stats[i].offset);
|
|
+}
|
|
+
|
|
+static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ switch (stringset) {
|
|
+ case ETH_SS_STATS:
|
|
+ for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
|
|
+ memcpy(data + i * ETH_GSTRING_LEN,
|
|
+ xenvif_stats[i].name, ETH_GSTRING_LEN);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static struct ethtool_ops xenvif_ethtool_ops = {
|
|
+ .get_tx_csum = ethtool_op_get_tx_csum,
|
|
+ .set_tx_csum = xenvif_set_tx_csum,
|
|
+ .get_sg = ethtool_op_get_sg,
|
|
+ .set_sg = xenvif_set_sg,
|
|
+ .get_tso = ethtool_op_get_tso,
|
|
+ .set_tso = xenvif_set_tso,
|
|
+ .get_link = ethtool_op_get_link,
|
|
+
|
|
+ .get_sset_count = xenvif_get_sset_count,
|
|
+ .get_ethtool_stats = xenvif_get_ethtool_stats,
|
|
+ .get_strings = xenvif_get_strings,
|
|
+};
|
|
+
|
|
+static struct net_device_ops xenvif_netdev_ops = {
|
|
+ .ndo_start_xmit = xenvif_start_xmit,
|
|
+ .ndo_get_stats = xenvif_get_stats,
|
|
+ .ndo_open = xenvif_open,
|
|
+ .ndo_stop = xenvif_close,
|
|
+ .ndo_change_mtu = xenvif_change_mtu,
|
|
+};
|
|
+
|
|
+struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
|
|
+ unsigned int handle)
|
|
+{
|
|
+ int err;
|
|
+ struct net_device *dev;
|
|
+ struct xenvif *vif;
|
|
+ char name[IFNAMSIZ] = {};
|
|
+
|
|
+ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
|
|
+ dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup);
|
|
+ if (dev == NULL) {
|
|
+ pr_warn("Could not allocate netdev\n");
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+ }
|
|
+
|
|
+ SET_NETDEV_DEV(dev, parent);
|
|
+
|
|
+ vif = netdev_priv(dev);
|
|
+ vif->domid = domid;
|
|
+ vif->handle = handle;
|
|
+ vif->netbk = NULL;
|
|
+ vif->can_sg = 1;
|
|
+ vif->csum = 1;
|
|
+ atomic_set(&vif->refcnt, 1);
|
|
+ init_waitqueue_head(&vif->waiting_to_free);
|
|
+ vif->dev = dev;
|
|
+ INIT_LIST_HEAD(&vif->schedule_list);
|
|
+ INIT_LIST_HEAD(&vif->notify_list);
|
|
+
|
|
+ vif->credit_bytes = vif->remaining_credit = ~0UL;
|
|
+ vif->credit_usec = 0UL;
|
|
+ init_timer(&vif->credit_timeout);
|
|
+ /* Initialize 'expires' now: it's used to track the credit window. */
|
|
+ vif->credit_timeout.expires = jiffies;
|
|
+
|
|
+ dev->netdev_ops = &xenvif_netdev_ops;
|
|
+ xenvif_set_features(vif);
|
|
+ SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
|
|
+
|
|
+ dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
|
|
+
|
|
+ /*
|
|
+ * Initialise a dummy MAC address. We choose the numerically
|
|
+ * largest non-broadcast address to prevent the address getting
|
|
+ * stolen by an Ethernet bridge for STP purposes.
|
|
+ * (FE:FF:FF:FF:FF:FF)
|
|
+ */
|
|
+ memset(dev->dev_addr, 0xFF, ETH_ALEN);
|
|
+ dev->dev_addr[0] &= ~0x01;
|
|
+
|
|
+ netif_carrier_off(dev);
|
|
+
|
|
+ err = register_netdev(dev);
|
|
+ if (err) {
|
|
+ netdev_warn(dev, "Could not register device: err=%d\n", err);
|
|
+ free_netdev(dev);
|
|
+ return ERR_PTR(err);
|
|
+ }
|
|
+
|
|
+ netdev_dbg(dev, "Successfully created xenvif\n");
|
|
+ return vif;
|
|
+}
|
|
+
|
|
+int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
|
|
+ unsigned long rx_ring_ref, unsigned int evtchn)
|
|
+{
|
|
+ int err = -ENOMEM;
|
|
+
|
|
+ /* Already connected through? */
|
|
+ if (vif->irq)
|
|
+ return 0;
|
|
+
|
|
+ xenvif_set_features(vif);
|
|
+
|
|
+ err = xen_netbk_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
|
|
+ if (err < 0)
|
|
+ goto err;
|
|
+
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(
|
|
+ vif->domid, evtchn, xenvif_interrupt, 0,
|
|
+ vif->dev->name, vif);
|
|
+ if (err < 0)
|
|
+ goto err_unmap;
|
|
+ vif->irq = err;
|
|
+ disable_irq(vif->irq);
|
|
+
|
|
+ xenvif_get(vif);
|
|
+
|
|
+ rtnl_lock();
|
|
+ netif_carrier_on(vif->dev);
|
|
+ if (netif_running(vif->dev))
|
|
+ xenvif_up(vif);
|
|
+ rtnl_unlock();
|
|
+
|
|
+ return 0;
|
|
+err_unmap:
|
|
+ xen_netbk_unmap_frontend_rings(vif);
|
|
+err:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void xenvif_disconnect(struct xenvif *vif)
|
|
+{
|
|
+ struct net_device *dev = vif->dev;
|
|
+ if (netif_carrier_ok(dev)) {
|
|
+ rtnl_lock();
|
|
+ netif_carrier_off(dev); /* discard queued packets */
|
|
+ if (netif_running(dev))
|
|
+ xenvif_down(vif);
|
|
+ rtnl_unlock();
|
|
+ xenvif_put(vif);
|
|
+ }
|
|
+
|
|
+ atomic_dec(&vif->refcnt);
|
|
+ wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0);
|
|
+
|
|
+ del_timer_sync(&vif->credit_timeout);
|
|
+
|
|
+ if (vif->irq)
|
|
+ unbind_from_irqhandler(vif->irq, vif);
|
|
+
|
|
+ unregister_netdev(vif->dev);
|
|
+
|
|
+ xen_netbk_unmap_frontend_rings(vif);
|
|
+
|
|
+ free_netdev(vif->dev);
|
|
+}
|
|
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
|
|
new file mode 100644
|
|
index 0000000..c2669b8
|
|
--- /dev/null
|
|
+++ b/drivers/net/xen-netback/netback.c
|
|
@@ -0,0 +1,1745 @@
|
|
+/*
|
|
+ * Back-end of the driver for virtual network devices. This portion of the
|
|
+ * driver exports a 'unified' network-device interface that can be accessed
|
|
+ * by any operating system that implements a compatible front end. A
|
|
+ * reference front-end implementation can be found in:
|
|
+ * drivers/net/xen-netfront.c
|
|
+ *
|
|
+ * Copyright (c) 2002-2005, K A Fraser
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/if_vlan.h>
|
|
+#include <linux/udp.h>
|
|
+
|
|
+#include <net/tcp.h>
|
|
+
|
|
+#include <xen/events.h>
|
|
+#include <xen/interface/memory.h>
|
|
+
|
|
+#include <asm/xen/hypercall.h>
|
|
+#include <asm/xen/page.h>
|
|
+
|
|
+struct pending_tx_info {
|
|
+ struct xen_netif_tx_request req;
|
|
+ struct xenvif *vif;
|
|
+};
|
|
+typedef unsigned int pending_ring_idx_t;
|
|
+
|
|
+struct netbk_rx_meta {
|
|
+ int id;
|
|
+ int size;
|
|
+ int gso_size;
|
|
+};
|
|
+
|
|
+#define MAX_PENDING_REQS 256
|
|
+
|
|
+#define MAX_BUFFER_OFFSET PAGE_SIZE
|
|
+
|
|
+/* extra field used in struct page */
|
|
+union page_ext {
|
|
+ struct {
|
|
+#if BITS_PER_LONG < 64
|
|
+#define IDX_WIDTH 8
|
|
+#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
|
|
+ unsigned int group:GROUP_WIDTH;
|
|
+ unsigned int idx:IDX_WIDTH;
|
|
+#else
|
|
+ unsigned int group, idx;
|
|
+#endif
|
|
+ } e;
|
|
+ void *mapping;
|
|
+};
|
|
+
|
|
+struct xen_netbk {
|
|
+ wait_queue_head_t wq;
|
|
+ struct task_struct *task;
|
|
+
|
|
+ struct sk_buff_head rx_queue;
|
|
+ struct sk_buff_head tx_queue;
|
|
+
|
|
+ struct timer_list net_timer;
|
|
+
|
|
+ struct page *mmap_pages[MAX_PENDING_REQS];
|
|
+
|
|
+ pending_ring_idx_t pending_prod;
|
|
+ pending_ring_idx_t pending_cons;
|
|
+ struct list_head net_schedule_list;
|
|
+
|
|
+ /* Protect the net_schedule_list in netif. */
|
|
+ spinlock_t net_schedule_list_lock;
|
|
+
|
|
+ atomic_t netfront_count;
|
|
+
|
|
+ struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
|
|
+ struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
|
|
+
|
|
+ u16 pending_ring[MAX_PENDING_REQS];
|
|
+
|
|
+ /*
|
|
+ * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
|
|
+ * head/fragment page uses 2 copy operations because it
|
|
+ * straddles two buffers in the frontend.
|
|
+ */
|
|
+ struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
|
|
+ struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
|
|
+};
|
|
+
|
|
+static struct xen_netbk *xen_netbk;
|
|
+static int xen_netbk_group_nr;
|
|
+
|
|
+void xen_netbk_add_xenvif(struct xenvif *vif)
|
|
+{
|
|
+ int i;
|
|
+ int min_netfront_count;
|
|
+ int min_group = 0;
|
|
+ struct xen_netbk *netbk;
|
|
+
|
|
+ min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
|
|
+ for (i = 0; i < xen_netbk_group_nr; i++) {
|
|
+ int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
|
|
+ if (netfront_count < min_netfront_count) {
|
|
+ min_group = i;
|
|
+ min_netfront_count = netfront_count;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ netbk = &xen_netbk[min_group];
|
|
+
|
|
+ vif->netbk = netbk;
|
|
+ atomic_inc(&netbk->netfront_count);
|
|
+}
|
|
+
|
|
+void xen_netbk_remove_xenvif(struct xenvif *vif)
|
|
+{
|
|
+ struct xen_netbk *netbk = vif->netbk;
|
|
+ vif->netbk = NULL;
|
|
+ atomic_dec(&netbk->netfront_count);
|
|
+}
|
|
+
|
|
+static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
|
|
+static void make_tx_response(struct xenvif *vif,
|
|
+ struct xen_netif_tx_request *txp,
|
|
+ s8 st);
|
|
+static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
|
|
+ u16 id,
|
|
+ s8 st,
|
|
+ u16 offset,
|
|
+ u16 size,
|
|
+ u16 flags);
|
|
+
|
|
+static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
|
|
+ unsigned int idx)
|
|
+{
|
|
+ return page_to_pfn(netbk->mmap_pages[idx]);
|
|
+}
|
|
+
|
|
+static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
|
|
+ unsigned int idx)
|
|
+{
|
|
+ return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
|
|
+}
|
|
+
|
|
+/* extra field used in struct page */
|
|
+static inline void set_page_ext(struct page *pg, struct xen_netbk *netbk,
|
|
+ unsigned int idx)
|
|
+{
|
|
+ unsigned int group = netbk - xen_netbk;
|
|
+ union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
|
|
+
|
|
+ BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
|
|
+ pg->mapping = ext.mapping;
|
|
+}
|
|
+
|
|
+static int get_page_ext(struct page *pg,
|
|
+ unsigned int *pgroup, unsigned int *pidx)
|
|
+{
|
|
+ union page_ext ext = { .mapping = pg->mapping };
|
|
+ struct xen_netbk *netbk;
|
|
+ unsigned int group, idx;
|
|
+
|
|
+ group = ext.e.group - 1;
|
|
+
|
|
+ if (group < 0 || group >= xen_netbk_group_nr)
|
|
+ return 0;
|
|
+
|
|
+ netbk = &xen_netbk[group];
|
|
+
|
|
+ idx = ext.e.idx;
|
|
+
|
|
+ if ((idx < 0) || (idx >= MAX_PENDING_REQS))
|
|
+ return 0;
|
|
+
|
|
+ if (netbk->mmap_pages[idx] != pg)
|
|
+ return 0;
|
|
+
|
|
+ *pgroup = group;
|
|
+ *pidx = idx;
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This is the amount of packet we copy rather than map, so that the
|
|
+ * guest can't fiddle with the contents of the headers while we do
|
|
+ * packet processing on them (netfilter, routing, etc).
|
|
+ */
|
|
+#define PKT_PROT_LEN (ETH_HLEN + \
|
|
+ VLAN_HLEN + \
|
|
+ sizeof(struct iphdr) + MAX_IPOPTLEN + \
|
|
+ sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
|
|
+
|
|
+static inline pending_ring_idx_t pending_index(unsigned i)
|
|
+{
|
|
+ return i & (MAX_PENDING_REQS-1);
|
|
+}
|
|
+
|
|
+static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
|
|
+{
|
|
+ return MAX_PENDING_REQS -
|
|
+ netbk->pending_prod + netbk->pending_cons;
|
|
+}
|
|
+
|
|
+static void xen_netbk_kick_thread(struct xen_netbk *netbk)
|
|
+{
|
|
+ wake_up(&netbk->wq);
|
|
+}
|
|
+
|
|
+static int max_required_rx_slots(struct xenvif *vif)
|
|
+{
|
|
+ int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
|
|
+
|
|
+ if (vif->can_sg || vif->gso || vif->gso_prefix)
|
|
+ max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
|
|
+
|
|
+ return max;
|
|
+}
|
|
+
|
|
+int xen_netbk_rx_ring_full(struct xenvif *vif)
|
|
+{
|
|
+ RING_IDX peek = vif->rx_req_cons_peek;
|
|
+ RING_IDX needed = max_required_rx_slots(vif);
|
|
+
|
|
+ return ((vif->rx.sring->req_prod - peek) < needed) ||
|
|
+ ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
|
|
+}
|
|
+
|
|
+int xen_netbk_must_stop_queue(struct xenvif *vif)
|
|
+{
|
|
+ if (!xen_netbk_rx_ring_full(vif))
|
|
+ return 0;
|
|
+
|
|
+ vif->rx.sring->req_event = vif->rx_req_cons_peek +
|
|
+ max_required_rx_slots(vif);
|
|
+ mb(); /* request notification /then/ check the queue */
|
|
+
|
|
+ return xen_netbk_rx_ring_full(vif);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Returns true if we should start a new receive buffer instead of
|
|
+ * adding 'size' bytes to a buffer which currently contains 'offset'
|
|
+ * bytes.
|
|
+ */
|
|
+static bool start_new_rx_buffer(int offset, unsigned long size, int head)
|
|
+{
|
|
+ /* simple case: we have completely filled the current buffer. */
|
|
+ if (offset == MAX_BUFFER_OFFSET)
|
|
+ return true;
|
|
+
|
|
+ /*
|
|
+ * complex case: start a fresh buffer if the current frag
|
|
+ * would overflow the current buffer but only if:
|
|
+ * (i) this frag would fit completely in the next buffer
|
|
+ * and (ii) there is already some data in the current buffer
|
|
+ * and (iii) this is not the head buffer.
|
|
+ *
|
|
+ * Where:
|
|
+ * - (i) stops us splitting a frag into two copies
|
|
+ * unless the frag is too large for a single buffer.
|
|
+ * - (ii) stops us from leaving a buffer pointlessly empty.
|
|
+ * - (iii) stops us leaving the first buffer
|
|
+ * empty. Strictly speaking this is already covered
|
|
+ * by (ii) but is explicitly checked because
|
|
+ * netfront relies on the first buffer being
|
|
+ * non-empty and can crash otherwise.
|
|
+ *
|
|
+ * This means we will effectively linearise small
|
|
+ * frags but do not needlessly split large buffers
|
|
+ * into multiple copies tend to give large frags their
|
|
+ * own buffers as before.
|
|
+ */
|
|
+ if ((offset + size > MAX_BUFFER_OFFSET) &&
|
|
+ (size <= MAX_BUFFER_OFFSET) && offset && !head)
|
|
+ return true;
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Figure out how many ring slots we're going to need to send @skb to
|
|
+ * the guest. This function is essentially a dry run of
|
|
+ * netbk_gop_frag_copy.
|
|
+ */
|
|
+unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
|
|
+{
|
|
+ unsigned int count;
|
|
+ int i, copy_off;
|
|
+
|
|
+ count = DIV_ROUND_UP(
|
|
+ offset_in_page(skb->data)+skb_headlen(skb), PAGE_SIZE);
|
|
+
|
|
+ copy_off = skb_headlen(skb) % PAGE_SIZE;
|
|
+
|
|
+ if (skb_shinfo(skb)->gso_size)
|
|
+ count++;
|
|
+
|
|
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
|
|
+ unsigned long size = skb_shinfo(skb)->frags[i].size;
|
|
+ unsigned long bytes;
|
|
+ while (size > 0) {
|
|
+ BUG_ON(copy_off > MAX_BUFFER_OFFSET);
|
|
+
|
|
+ if (start_new_rx_buffer(copy_off, size, 0)) {
|
|
+ count++;
|
|
+ copy_off = 0;
|
|
+ }
|
|
+
|
|
+ bytes = size;
|
|
+ if (copy_off + bytes > MAX_BUFFER_OFFSET)
|
|
+ bytes = MAX_BUFFER_OFFSET - copy_off;
|
|
+
|
|
+ copy_off += bytes;
|
|
+ size -= bytes;
|
|
+ }
|
|
+ }
|
|
+ return count;
|
|
+}
|
|
+
|
|
+struct netrx_pending_operations {
|
|
+ unsigned copy_prod, copy_cons;
|
|
+ unsigned meta_prod, meta_cons;
|
|
+ struct gnttab_copy *copy;
|
|
+ struct netbk_rx_meta *meta;
|
|
+ int copy_off;
|
|
+ grant_ref_t copy_gref;
|
|
+};
|
|
+
|
|
+static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif,
|
|
+ struct netrx_pending_operations *npo)
|
|
+{
|
|
+ struct netbk_rx_meta *meta;
|
|
+ struct xen_netif_rx_request *req;
|
|
+
|
|
+ req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
|
|
+
|
|
+ meta = npo->meta + npo->meta_prod++;
|
|
+ meta->gso_size = 0;
|
|
+ meta->size = 0;
|
|
+ meta->id = req->id;
|
|
+
|
|
+ npo->copy_off = 0;
|
|
+ npo->copy_gref = req->gref;
|
|
+
|
|
+ return meta;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Set up the grant operations for this fragment. If it's a flipping
|
|
+ * interface, we also set up the unmap request from here.
|
|
+ */
|
|
+static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
|
|
+ struct netrx_pending_operations *npo,
|
|
+ struct page *page, unsigned long size,
|
|
+ unsigned long offset, int *head)
|
|
+{
|
|
+ struct gnttab_copy *copy_gop;
|
|
+ struct netbk_rx_meta *meta;
|
|
+ /*
|
|
+ * These variables a used iff get_page_ext returns true,
|
|
+ * in which case they are guaranteed to be initialized.
|
|
+ */
|
|
+ unsigned int uninitialized_var(group), uninitialized_var(idx);
|
|
+ int foreign = get_page_ext(page, &group, &idx);
|
|
+ unsigned long bytes;
|
|
+
|
|
+ /* Data must not cross a page boundary. */
|
|
+ BUG_ON(size + offset > PAGE_SIZE);
|
|
+
|
|
+ meta = npo->meta + npo->meta_prod - 1;
|
|
+
|
|
+ while (size > 0) {
|
|
+ BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
|
|
+
|
|
+ if (start_new_rx_buffer(npo->copy_off, size, *head)) {
|
|
+ /*
|
|
+ * Netfront requires there to be some data in the head
|
|
+ * buffer.
|
|
+ */
|
|
+ BUG_ON(*head);
|
|
+
|
|
+ meta = get_next_rx_buffer(vif, npo);
|
|
+ }
|
|
+
|
|
+ bytes = size;
|
|
+ if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
|
|
+ bytes = MAX_BUFFER_OFFSET - npo->copy_off;
|
|
+
|
|
+ copy_gop = npo->copy + npo->copy_prod++;
|
|
+ copy_gop->flags = GNTCOPY_dest_gref;
|
|
+ if (foreign) {
|
|
+ struct xen_netbk *netbk = &xen_netbk[group];
|
|
+ struct pending_tx_info *src_pend;
|
|
+
|
|
+ src_pend = &netbk->pending_tx_info[idx];
|
|
+
|
|
+ copy_gop->source.domid = src_pend->vif->domid;
|
|
+ copy_gop->source.u.ref = src_pend->req.gref;
|
|
+ copy_gop->flags |= GNTCOPY_source_gref;
|
|
+ } else {
|
|
+ void *vaddr = page_address(page);
|
|
+ copy_gop->source.domid = DOMID_SELF;
|
|
+ copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
|
|
+ }
|
|
+ copy_gop->source.offset = offset;
|
|
+ copy_gop->dest.domid = vif->domid;
|
|
+
|
|
+ copy_gop->dest.offset = npo->copy_off;
|
|
+ copy_gop->dest.u.ref = npo->copy_gref;
|
|
+ copy_gop->len = bytes;
|
|
+
|
|
+ npo->copy_off += bytes;
|
|
+ meta->size += bytes;
|
|
+
|
|
+ offset += bytes;
|
|
+ size -= bytes;
|
|
+
|
|
+ /* Leave a gap for the GSO descriptor. */
|
|
+ if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
|
|
+ vif->rx.req_cons++;
|
|
+
|
|
+ *head = 0; /* There must be something in this buffer now. */
|
|
+
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Prepare an SKB to be transmitted to the frontend.
|
|
+ *
|
|
+ * This function is responsible for allocating grant operations, meta
|
|
+ * structures, etc.
|
|
+ *
|
|
+ * It returns the number of meta structures consumed. The number of
|
|
+ * ring slots used is always equal to the number of meta slots used
|
|
+ * plus the number of GSO descriptors used. Currently, we use either
|
|
+ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
|
|
+ * frontend-side LRO).
|
|
+ */
|
|
+static int netbk_gop_skb(struct sk_buff *skb,
|
|
+ struct netrx_pending_operations *npo)
|
|
+{
|
|
+ struct xenvif *vif = netdev_priv(skb->dev);
|
|
+ int nr_frags = skb_shinfo(skb)->nr_frags;
|
|
+ int i;
|
|
+ struct xen_netif_rx_request *req;
|
|
+ struct netbk_rx_meta *meta;
|
|
+ unsigned char *data;
|
|
+ int head = 1;
|
|
+ int old_meta_prod;
|
|
+
|
|
+ old_meta_prod = npo->meta_prod;
|
|
+
|
|
+ /* Set up a GSO prefix descriptor, if necessary */
|
|
+ if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {
|
|
+ req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
|
|
+ meta = npo->meta + npo->meta_prod++;
|
|
+ meta->gso_size = skb_shinfo(skb)->gso_size;
|
|
+ meta->size = 0;
|
|
+ meta->id = req->id;
|
|
+ }
|
|
+
|
|
+ req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
|
|
+ meta = npo->meta + npo->meta_prod++;
|
|
+
|
|
+ if (!vif->gso_prefix)
|
|
+ meta->gso_size = skb_shinfo(skb)->gso_size;
|
|
+ else
|
|
+ meta->gso_size = 0;
|
|
+
|
|
+ meta->size = 0;
|
|
+ meta->id = req->id;
|
|
+ npo->copy_off = 0;
|
|
+ npo->copy_gref = req->gref;
|
|
+
|
|
+ data = skb->data;
|
|
+ while (data < skb_tail_pointer(skb)) {
|
|
+ unsigned int offset = offset_in_page(data);
|
|
+ unsigned int len = PAGE_SIZE - offset;
|
|
+
|
|
+ if (data + len > skb_tail_pointer(skb))
|
|
+ len = skb_tail_pointer(skb) - data;
|
|
+
|
|
+ netbk_gop_frag_copy(vif, skb, npo,
|
|
+ virt_to_page(data), len, offset, &head);
|
|
+ data += len;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < nr_frags; i++) {
|
|
+ netbk_gop_frag_copy(vif, skb, npo,
|
|
+ skb_shinfo(skb)->frags[i].page,
|
|
+ skb_shinfo(skb)->frags[i].size,
|
|
+ skb_shinfo(skb)->frags[i].page_offset,
|
|
+ &head);
|
|
+ }
|
|
+
|
|
+ return npo->meta_prod - old_meta_prod;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
|
|
+ * used to set up the operations on the top of
|
|
+ * netrx_pending_operations, which have since been done. Check that
|
|
+ * they didn't give any errors and advance over them.
|
|
+ */
|
|
+static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots,
|
|
+ struct netrx_pending_operations *npo)
|
|
+{
|
|
+ struct gnttab_copy *copy_op;
|
|
+ int status = XEN_NETIF_RSP_OKAY;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < nr_meta_slots; i++) {
|
|
+ copy_op = npo->copy + npo->copy_cons++;
|
|
+ if (copy_op->status != GNTST_okay) {
|
|
+ netdev_dbg(vif->dev,
|
|
+ "Bad status %d from copy to DOM%d.\n",
|
|
+ copy_op->status, vif->domid);
|
|
+ status = XEN_NETIF_RSP_ERROR;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return status;
|
|
+}
|
|
+
|
|
+static void netbk_add_frag_responses(struct xenvif *vif, int status,
|
|
+ struct netbk_rx_meta *meta,
|
|
+ int nr_meta_slots)
|
|
+{
|
|
+ int i;
|
|
+ unsigned long offset;
|
|
+
|
|
+ /* No fragments used */
|
|
+ if (nr_meta_slots <= 1)
|
|
+ return;
|
|
+
|
|
+ nr_meta_slots--;
|
|
+
|
|
+ for (i = 0; i < nr_meta_slots; i++) {
|
|
+ int flags;
|
|
+ if (i == nr_meta_slots - 1)
|
|
+ flags = 0;
|
|
+ else
|
|
+ flags = XEN_NETRXF_more_data;
|
|
+
|
|
+ offset = 0;
|
|
+ make_rx_response(vif, meta[i].id, status, offset,
|
|
+ meta[i].size, flags);
|
|
+ }
|
|
+}
|
|
+
|
|
+struct skb_cb_overlay {
|
|
+ int meta_slots_used;
|
|
+};
|
|
+
|
|
+static void xen_netbk_rx_action(struct xen_netbk *netbk)
|
|
+{
|
|
+ struct xenvif *vif = NULL, *tmp;
|
|
+ s8 status;
|
|
+ u16 irq, flags;
|
|
+ struct xen_netif_rx_response *resp;
|
|
+ struct sk_buff_head rxq;
|
|
+ struct sk_buff *skb;
|
|
+ LIST_HEAD(notify);
|
|
+ int ret;
|
|
+ int nr_frags;
|
|
+ int count;
|
|
+ unsigned long offset;
|
|
+ struct skb_cb_overlay *sco;
|
|
+
|
|
+ struct netrx_pending_operations npo = {
|
|
+ .copy = netbk->grant_copy_op,
|
|
+ .meta = netbk->meta,
|
|
+ };
|
|
+
|
|
+ skb_queue_head_init(&rxq);
|
|
+
|
|
+ count = 0;
|
|
+
|
|
+ while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
|
|
+ vif = netdev_priv(skb->dev);
|
|
+ nr_frags = skb_shinfo(skb)->nr_frags;
|
|
+
|
|
+ sco = (struct skb_cb_overlay *)skb->cb;
|
|
+ sco->meta_slots_used = netbk_gop_skb(skb, &npo);
|
|
+
|
|
+ count += nr_frags + 1;
|
|
+
|
|
+ __skb_queue_tail(&rxq, skb);
|
|
+
|
|
+ /* Filled the batch queue? */
|
|
+ if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
|
|
+
|
|
+ if (!npo.copy_prod)
|
|
+ return;
|
|
+
|
|
+ BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
|
|
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
|
|
+ npo.copy_prod);
|
|
+ BUG_ON(ret != 0);
|
|
+
|
|
+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
|
|
+ sco = (struct skb_cb_overlay *)skb->cb;
|
|
+
|
|
+ vif = netdev_priv(skb->dev);
|
|
+
|
|
+ if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
|
|
+ resp = RING_GET_RESPONSE(&vif->rx,
|
|
+ vif->rx.rsp_prod_pvt++);
|
|
+
|
|
+ resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
|
|
+
|
|
+ resp->offset = netbk->meta[npo.meta_cons].gso_size;
|
|
+ resp->id = netbk->meta[npo.meta_cons].id;
|
|
+ resp->status = sco->meta_slots_used;
|
|
+
|
|
+ npo.meta_cons++;
|
|
+ sco->meta_slots_used--;
|
|
+ }
|
|
+
|
|
+
|
|
+ vif->stats.tx_bytes += skb->len;
|
|
+ vif->stats.tx_packets++;
|
|
+
|
|
+ status = netbk_check_gop(vif, sco->meta_slots_used, &npo);
|
|
+
|
|
+ if (sco->meta_slots_used == 1)
|
|
+ flags = 0;
|
|
+ else
|
|
+ flags = XEN_NETRXF_more_data;
|
|
+
|
|
+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
|
|
+ flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
|
|
+ else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
|
|
+ /* remote but checksummed. */
|
|
+ flags |= XEN_NETRXF_data_validated;
|
|
+
|
|
+ offset = 0;
|
|
+ resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
|
|
+ status, offset,
|
|
+ netbk->meta[npo.meta_cons].size,
|
|
+ flags);
|
|
+
|
|
+ if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
|
|
+ struct xen_netif_extra_info *gso =
|
|
+ (struct xen_netif_extra_info *)
|
|
+ RING_GET_RESPONSE(&vif->rx,
|
|
+ vif->rx.rsp_prod_pvt++);
|
|
+
|
|
+ resp->flags |= XEN_NETRXF_extra_info;
|
|
+
|
|
+ gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
|
|
+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
|
|
+ gso->u.gso.pad = 0;
|
|
+ gso->u.gso.features = 0;
|
|
+
|
|
+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
|
|
+ gso->flags = 0;
|
|
+ }
|
|
+
|
|
+ netbk_add_frag_responses(vif, status,
|
|
+ netbk->meta + npo.meta_cons + 1,
|
|
+ sco->meta_slots_used);
|
|
+
|
|
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
|
|
+ irq = vif->irq;
|
|
+ if (ret && list_empty(&vif->notify_list))
|
|
+ list_add_tail(&vif->notify_list, ¬ify);
|
|
+
|
|
+ xenvif_notify_tx_completion(vif);
|
|
+
|
|
+ xenvif_put(vif);
|
|
+ npo.meta_cons += sco->meta_slots_used;
|
|
+ dev_kfree_skb(skb);
|
|
+ }
|
|
+
|
|
+ list_for_each_entry_safe(vif, tmp, ¬ify, notify_list) {
|
|
+ notify_remote_via_irq(vif->irq);
|
|
+ list_del_init(&vif->notify_list);
|
|
+ }
|
|
+
|
|
+ /* More work to do? */
|
|
+ if (!skb_queue_empty(&netbk->rx_queue) &&
|
|
+ !timer_pending(&netbk->net_timer))
|
|
+ xen_netbk_kick_thread(netbk);
|
|
+}
|
|
+
|
|
+void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
|
|
+{
|
|
+ struct xen_netbk *netbk = vif->netbk;
|
|
+
|
|
+ skb_queue_tail(&netbk->rx_queue, skb);
|
|
+
|
|
+ xen_netbk_kick_thread(netbk);
|
|
+}
|
|
+
|
|
+static void xen_netbk_alarm(unsigned long data)
|
|
+{
|
|
+ struct xen_netbk *netbk = (struct xen_netbk *)data;
|
|
+ xen_netbk_kick_thread(netbk);
|
|
+}
|
|
+
|
|
+static int __on_net_schedule_list(struct xenvif *vif)
|
|
+{
|
|
+ return !list_empty(&vif->schedule_list);
|
|
+}
|
|
+
|
|
+/* Must be called with net_schedule_list_lock held */
|
|
+static void remove_from_net_schedule_list(struct xenvif *vif)
|
|
+{
|
|
+ if (likely(__on_net_schedule_list(vif))) {
|
|
+ list_del_init(&vif->schedule_list);
|
|
+ xenvif_put(vif);
|
|
+ }
|
|
+}
|
|
+
|
|
+static struct xenvif *poll_net_schedule_list(struct xen_netbk *netbk)
|
|
+{
|
|
+ struct xenvif *vif = NULL;
|
|
+
|
|
+ spin_lock_irq(&netbk->net_schedule_list_lock);
|
|
+ if (list_empty(&netbk->net_schedule_list))
|
|
+ goto out;
|
|
+
|
|
+ vif = list_first_entry(&netbk->net_schedule_list,
|
|
+ struct xenvif, schedule_list);
|
|
+ if (!vif)
|
|
+ goto out;
|
|
+
|
|
+ xenvif_get(vif);
|
|
+
|
|
+ remove_from_net_schedule_list(vif);
|
|
+out:
|
|
+ spin_unlock_irq(&netbk->net_schedule_list_lock);
|
|
+ return vif;
|
|
+}
|
|
+
|
|
+void xen_netbk_schedule_xenvif(struct xenvif *vif)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ struct xen_netbk *netbk = vif->netbk;
|
|
+
|
|
+ if (__on_net_schedule_list(vif))
|
|
+ goto kick;
|
|
+
|
|
+ spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
|
|
+ if (!__on_net_schedule_list(vif) &&
|
|
+ likely(xenvif_schedulable(vif))) {
|
|
+ list_add_tail(&vif->schedule_list, &netbk->net_schedule_list);
|
|
+ xenvif_get(vif);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
|
|
+
|
|
+kick:
|
|
+ smp_mb();
|
|
+ if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
|
|
+ !list_empty(&netbk->net_schedule_list))
|
|
+ xen_netbk_kick_thread(netbk);
|
|
+}
|
|
+
|
|
+void xen_netbk_deschedule_xenvif(struct xenvif *vif)
|
|
+{
|
|
+ struct xen_netbk *netbk = vif->netbk;
|
|
+ spin_lock_irq(&netbk->net_schedule_list_lock);
|
|
+ remove_from_net_schedule_list(vif);
|
|
+ spin_unlock_irq(&netbk->net_schedule_list_lock);
|
|
+}
|
|
+
|
|
+void xen_netbk_check_rx_xenvif(struct xenvif *vif)
|
|
+{
|
|
+ int more_to_do;
|
|
+
|
|
+ RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
|
|
+
|
|
+ if (more_to_do)
|
|
+ xen_netbk_schedule_xenvif(vif);
|
|
+}
|
|
+
|
|
+static void tx_add_credit(struct xenvif *vif)
|
|
+{
|
|
+ unsigned long max_burst, max_credit;
|
|
+
|
|
+ /*
|
|
+ * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
|
|
+ * Otherwise the interface can seize up due to insufficient credit.
|
|
+ */
|
|
+ max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
|
|
+ max_burst = min(max_burst, 131072UL);
|
|
+ max_burst = max(max_burst, vif->credit_bytes);
|
|
+
|
|
+ /* Take care that adding a new chunk of credit doesn't wrap to zero. */
|
|
+ max_credit = vif->remaining_credit + vif->credit_bytes;
|
|
+ if (max_credit < vif->remaining_credit)
|
|
+ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
|
|
+
|
|
+ vif->remaining_credit = min(max_credit, max_burst);
|
|
+}
|
|
+
|
|
+static void tx_credit_callback(unsigned long data)
|
|
+{
|
|
+ struct xenvif *vif = (struct xenvif *)data;
|
|
+ tx_add_credit(vif);
|
|
+ xen_netbk_check_rx_xenvif(vif);
|
|
+}
|
|
+
|
|
+static void netbk_tx_err(struct xenvif *vif,
|
|
+ struct xen_netif_tx_request *txp, RING_IDX end)
|
|
+{
|
|
+ RING_IDX cons = vif->tx.req_cons;
|
|
+
|
|
+ do {
|
|
+ make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
|
|
+ if (cons >= end)
|
|
+ break;
|
|
+ txp = RING_GET_REQUEST(&vif->tx, cons++);
|
|
+ } while (1);
|
|
+ vif->tx.req_cons = cons;
|
|
+ xen_netbk_check_rx_xenvif(vif);
|
|
+ xenvif_put(vif);
|
|
+}
|
|
+
|
|
+static int netbk_count_requests(struct xenvif *vif,
|
|
+ struct xen_netif_tx_request *first,
|
|
+ struct xen_netif_tx_request *txp,
|
|
+ int work_to_do)
|
|
+{
|
|
+ RING_IDX cons = vif->tx.req_cons;
|
|
+ int frags = 0;
|
|
+
|
|
+ if (!(first->flags & XEN_NETTXF_more_data))
|
|
+ return 0;
|
|
+
|
|
+ do {
|
|
+ if (frags >= work_to_do) {
|
|
+ netdev_dbg(vif->dev, "Need more frags\n");
|
|
+ return -frags;
|
|
+ }
|
|
+
|
|
+ if (unlikely(frags >= MAX_SKB_FRAGS)) {
|
|
+ netdev_dbg(vif->dev, "Too many frags\n");
|
|
+ return -frags;
|
|
+ }
|
|
+
|
|
+ memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags),
|
|
+ sizeof(*txp));
|
|
+ if (txp->size > first->size) {
|
|
+ netdev_dbg(vif->dev, "Frags galore\n");
|
|
+ return -frags;
|
|
+ }
|
|
+
|
|
+ first->size -= txp->size;
|
|
+ frags++;
|
|
+
|
|
+ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
|
|
+ netdev_dbg(vif->dev, "txp->offset: %x, size: %u\n",
|
|
+ txp->offset, txp->size);
|
|
+ return -frags;
|
|
+ }
|
|
+ } while ((txp++)->flags & XEN_NETTXF_more_data);
|
|
+ return frags;
|
|
+}
|
|
+
|
|
+static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
|
|
+ struct sk_buff *skb,
|
|
+ unsigned long pending_idx)
|
|
+{
|
|
+ struct page *page;
|
|
+ page = alloc_page(GFP_KERNEL|__GFP_COLD);
|
|
+ if (!page)
|
|
+ return NULL;
|
|
+ set_page_ext(page, netbk, pending_idx);
|
|
+ netbk->mmap_pages[pending_idx] = page;
|
|
+ return page;
|
|
+}
|
|
+
|
|
+static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
|
|
+ struct xenvif *vif,
|
|
+ struct sk_buff *skb,
|
|
+ struct xen_netif_tx_request *txp,
|
|
+ struct gnttab_copy *gop)
|
|
+{
|
|
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
|
|
+ skb_frag_t *frags = shinfo->frags;
|
|
+ unsigned long pending_idx = *((u16 *)skb->data);
|
|
+ int i, start;
|
|
+
|
|
+ /* Skip first skb fragment if it is on same page as header fragment. */
|
|
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
|
|
+
|
|
+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
|
|
+ struct page *page;
|
|
+ pending_ring_idx_t index;
|
|
+ struct pending_tx_info *pending_tx_info =
|
|
+ netbk->pending_tx_info;
|
|
+
|
|
+ index = pending_index(netbk->pending_cons++);
|
|
+ pending_idx = netbk->pending_ring[index];
|
|
+ page = xen_netbk_alloc_page(netbk, skb, pending_idx);
|
|
+ if (!page)
|
|
+ return NULL;
|
|
+
|
|
+ netbk->mmap_pages[pending_idx] = page;
|
|
+
|
|
+ gop->source.u.ref = txp->gref;
|
|
+ gop->source.domid = vif->domid;
|
|
+ gop->source.offset = txp->offset;
|
|
+
|
|
+ gop->dest.u.gmfn = virt_to_mfn(page_address(page));
|
|
+ gop->dest.domid = DOMID_SELF;
|
|
+ gop->dest.offset = txp->offset;
|
|
+
|
|
+ gop->len = txp->size;
|
|
+ gop->flags = GNTCOPY_source_gref;
|
|
+
|
|
+ gop++;
|
|
+
|
|
+ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
|
|
+ xenvif_get(vif);
|
|
+ pending_tx_info[pending_idx].vif = vif;
|
|
+ frags[i].page = (void *)pending_idx;
|
|
+ }
|
|
+
|
|
+ return gop;
|
|
+}
|
|
+
|
|
+static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
|
|
+ struct sk_buff *skb,
|
|
+ struct gnttab_copy **gopp)
|
|
+{
|
|
+ struct gnttab_copy *gop = *gopp;
|
|
+ int pending_idx = *((u16 *)skb->data);
|
|
+ struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
|
|
+ struct xenvif *vif = pending_tx_info[pending_idx].vif;
|
|
+ struct xen_netif_tx_request *txp;
|
|
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
|
|
+ int nr_frags = shinfo->nr_frags;
|
|
+ int i, err, start;
|
|
+
|
|
+ /* Check status of header. */
|
|
+ err = gop->status;
|
|
+ if (unlikely(err)) {
|
|
+ pending_ring_idx_t index;
|
|
+ index = pending_index(netbk->pending_prod++);
|
|
+ txp = &pending_tx_info[pending_idx].req;
|
|
+ make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
|
|
+ netbk->pending_ring[index] = pending_idx;
|
|
+ xenvif_put(vif);
|
|
+ }
|
|
+
|
|
+ /* Skip first skb fragment if it is on same page as header fragment. */
|
|
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
|
|
+
|
|
+ for (i = start; i < nr_frags; i++) {
|
|
+ int j, newerr;
|
|
+ pending_ring_idx_t index;
|
|
+
|
|
+ pending_idx = (unsigned long)shinfo->frags[i].page;
|
|
+
|
|
+ /* Check error status: if okay then remember grant handle. */
|
|
+ newerr = (++gop)->status;
|
|
+ if (likely(!newerr)) {
|
|
+ /* Had a previous error? Invalidate this fragment. */
|
|
+ if (unlikely(err))
|
|
+ xen_netbk_idx_release(netbk, pending_idx);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ /* Error on this fragment: respond to client with an error. */
|
|
+ txp = &netbk->pending_tx_info[pending_idx].req;
|
|
+ make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
|
|
+ index = pending_index(netbk->pending_prod++);
|
|
+ netbk->pending_ring[index] = pending_idx;
|
|
+ xenvif_put(vif);
|
|
+
|
|
+ /* Not the first error? Preceding frags already invalidated. */
|
|
+ if (err)
|
|
+ continue;
|
|
+
|
|
+ /* First error: invalidate header and preceding fragments. */
|
|
+ pending_idx = *((u16 *)skb->data);
|
|
+ xen_netbk_idx_release(netbk, pending_idx);
|
|
+ for (j = start; j < i; j++) {
|
|
+ pending_idx = (unsigned long)shinfo->frags[i].page;
|
|
+ xen_netbk_idx_release(netbk, pending_idx);
|
|
+ }
|
|
+
|
|
+ /* Remember the error: invalidate all subsequent fragments. */
|
|
+ err = newerr;
|
|
+ }
|
|
+
|
|
+ *gopp = gop + 1;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
|
|
+{
|
|
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
|
|
+ int nr_frags = shinfo->nr_frags;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < nr_frags; i++) {
|
|
+ skb_frag_t *frag = shinfo->frags + i;
|
|
+ struct xen_netif_tx_request *txp;
|
|
+ unsigned long pending_idx;
|
|
+
|
|
+ pending_idx = (unsigned long)frag->page;
|
|
+
|
|
+ txp = &netbk->pending_tx_info[pending_idx].req;
|
|
+ frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
|
|
+ frag->size = txp->size;
|
|
+ frag->page_offset = txp->offset;
|
|
+
|
|
+ skb->len += txp->size;
|
|
+ skb->data_len += txp->size;
|
|
+ skb->truesize += txp->size;
|
|
+
|
|
+ /* Take an extra reference to offset xen_netbk_idx_release */
|
|
+ get_page(netbk->mmap_pages[pending_idx]);
|
|
+ xen_netbk_idx_release(netbk, pending_idx);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int xen_netbk_get_extras(struct xenvif *vif,
|
|
+ struct xen_netif_extra_info *extras,
|
|
+ int work_to_do)
|
|
+{
|
|
+ struct xen_netif_extra_info extra;
|
|
+ RING_IDX cons = vif->tx.req_cons;
|
|
+
|
|
+ do {
|
|
+ if (unlikely(work_to_do-- <= 0)) {
|
|
+ netdev_dbg(vif->dev, "Missing extra info\n");
|
|
+ return -EBADR;
|
|
+ }
|
|
+
|
|
+ memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
|
|
+ sizeof(extra));
|
|
+ if (unlikely(!extra.type ||
|
|
+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
|
|
+ vif->tx.req_cons = ++cons;
|
|
+ netdev_dbg(vif->dev,
|
|
+ "Invalid extra type: %d\n", extra.type);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
|
|
+ vif->tx.req_cons = ++cons;
|
|
+ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
|
|
+
|
|
+ return work_to_do;
|
|
+}
|
|
+
|
|
+static int netbk_set_skb_gso(struct xenvif *vif,
|
|
+ struct sk_buff *skb,
|
|
+ struct xen_netif_extra_info *gso)
|
|
+{
|
|
+ if (!gso->u.gso.size) {
|
|
+ netdev_dbg(vif->dev, "GSO size must not be zero.\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /* Currently only TCPv4 S.O. is supported. */
|
|
+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
|
|
+ netdev_dbg(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ skb_shinfo(skb)->gso_size = gso->u.gso.size;
|
|
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
|
|
+
|
|
+ /* Header must be checked, and gso_segs computed. */
|
|
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
|
|
+ skb_shinfo(skb)->gso_segs = 0;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
|
|
+{
|
|
+ struct iphdr *iph;
|
|
+ unsigned char *th;
|
|
+ int err = -EPROTO;
|
|
+ int recalculate_partial_csum = 0;
|
|
+
|
|
+ /*
|
|
+ * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
|
|
+ * peers can fail to set NETRXF_csum_blank when sending a GSO
|
|
+ * frame. In this case force the SKB to CHECKSUM_PARTIAL and
|
|
+ * recalculate the partial checksum.
|
|
+ */
|
|
+ if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
|
|
+ vif->rx_gso_checksum_fixup++;
|
|
+ skb->ip_summed = CHECKSUM_PARTIAL;
|
|
+ recalculate_partial_csum = 1;
|
|
+ }
|
|
+
|
|
+ /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
|
|
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
|
|
+ return 0;
|
|
+
|
|
+ if (skb->protocol != htons(ETH_P_IP))
|
|
+ goto out;
|
|
+
|
|
+ iph = (void *)skb->data;
|
|
+ th = skb->data + 4 * iph->ihl;
|
|
+ if (th >= skb_tail_pointer(skb))
|
|
+ goto out;
|
|
+
|
|
+ skb->csum_start = th - skb->head;
|
|
+ switch (iph->protocol) {
|
|
+ case IPPROTO_TCP:
|
|
+ skb->csum_offset = offsetof(struct tcphdr, check);
|
|
+
|
|
+ if (recalculate_partial_csum) {
|
|
+ struct tcphdr *tcph = (struct tcphdr *)th;
|
|
+ tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
|
|
+ skb->len - iph->ihl*4,
|
|
+ IPPROTO_TCP, 0);
|
|
+ }
|
|
+ break;
|
|
+ case IPPROTO_UDP:
|
|
+ skb->csum_offset = offsetof(struct udphdr, check);
|
|
+
|
|
+ if (recalculate_partial_csum) {
|
|
+ struct udphdr *udph = (struct udphdr *)th;
|
|
+ udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
|
|
+ skb->len - iph->ihl*4,
|
|
+ IPPROTO_UDP, 0);
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ if (net_ratelimit())
|
|
+ netdev_err(vif->dev,
|
|
+ "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
|
|
+ iph->protocol);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
|
|
+ goto out;
|
|
+
|
|
+ err = 0;
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
|
|
+{
|
|
+ unsigned long now = jiffies;
|
|
+ unsigned long next_credit =
|
|
+ vif->credit_timeout.expires +
|
|
+ msecs_to_jiffies(vif->credit_usec / 1000);
|
|
+
|
|
+ /* Timer could already be pending in rare cases. */
|
|
+ if (timer_pending(&vif->credit_timeout))
|
|
+ return true;
|
|
+
|
|
+ /* Passed the point where we can replenish credit? */
|
|
+ if (time_after_eq(now, next_credit)) {
|
|
+ vif->credit_timeout.expires = now;
|
|
+ tx_add_credit(vif);
|
|
+ }
|
|
+
|
|
+ /* Still too big to send right now? Set a callback. */
|
|
+ if (size > vif->remaining_credit) {
|
|
+ vif->credit_timeout.data =
|
|
+ (unsigned long)vif;
|
|
+ vif->credit_timeout.function =
|
|
+ tx_credit_callback;
|
|
+ mod_timer(&vif->credit_timeout,
|
|
+ next_credit);
|
|
+
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
+static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
|
|
+{
|
|
+ struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
|
|
+ struct sk_buff *skb;
|
|
+ int ret;
|
|
+
|
|
+ while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
|
|
+ !list_empty(&netbk->net_schedule_list)) {
|
|
+ struct xenvif *vif;
|
|
+ struct xen_netif_tx_request txreq;
|
|
+ struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
|
|
+ struct page *page;
|
|
+ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
|
|
+ u16 pending_idx;
|
|
+ RING_IDX idx;
|
|
+ int work_to_do;
|
|
+ unsigned int data_len;
|
|
+ pending_ring_idx_t index;
|
|
+
|
|
+ /* Get a netif from the list with work to do. */
|
|
+ vif = poll_net_schedule_list(netbk);
|
|
+ if (!vif)
|
|
+ continue;
|
|
+
|
|
+ RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
|
|
+ if (!work_to_do) {
|
|
+ xenvif_put(vif);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ idx = vif->tx.req_cons;
|
|
+ rmb(); /* Ensure that we see the request before we copy it. */
|
|
+ memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
|
|
+
|
|
+ /* Credit-based scheduling. */
|
|
+ if (txreq.size > vif->remaining_credit &&
|
|
+ tx_credit_exceeded(vif, txreq.size)) {
|
|
+ xenvif_put(vif);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ vif->remaining_credit -= txreq.size;
|
|
+
|
|
+ work_to_do--;
|
|
+ vif->tx.req_cons = ++idx;
|
|
+
|
|
+ memset(extras, 0, sizeof(extras));
|
|
+ if (txreq.flags & XEN_NETTXF_extra_info) {
|
|
+ work_to_do = xen_netbk_get_extras(vif, extras,
|
|
+ work_to_do);
|
|
+ idx = vif->tx.req_cons;
|
|
+ if (unlikely(work_to_do < 0)) {
|
|
+ netbk_tx_err(vif, &txreq, idx);
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
|
|
+ if (unlikely(ret < 0)) {
|
|
+ netbk_tx_err(vif, &txreq, idx - ret);
|
|
+ continue;
|
|
+ }
|
|
+ idx += ret;
|
|
+
|
|
+ if (unlikely(txreq.size < ETH_HLEN)) {
|
|
+ netdev_dbg(vif->dev,
|
|
+ "Bad packet size: %d\n", txreq.size);
|
|
+ netbk_tx_err(vif, &txreq, idx);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ /* No crossing a page as the payload mustn't fragment. */
|
|
+ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
|
|
+ netdev_dbg(vif->dev,
|
|
+ "txreq.offset: %x, size: %u, end: %lu\n",
|
|
+ txreq.offset, txreq.size,
|
|
+ (txreq.offset&~PAGE_MASK) + txreq.size);
|
|
+ netbk_tx_err(vif, &txreq, idx);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ index = pending_index(netbk->pending_cons);
|
|
+ pending_idx = netbk->pending_ring[index];
|
|
+
|
|
+ data_len = (txreq.size > PKT_PROT_LEN &&
|
|
+ ret < MAX_SKB_FRAGS) ?
|
|
+ PKT_PROT_LEN : txreq.size;
|
|
+
|
|
+ skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
|
|
+ GFP_ATOMIC | __GFP_NOWARN);
|
|
+ if (unlikely(skb == NULL)) {
|
|
+ netdev_dbg(vif->dev,
|
|
+ "Can't allocate a skb in start_xmit.\n");
|
|
+ netbk_tx_err(vif, &txreq, idx);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Packets passed to netif_rx() must have some headroom. */
|
|
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
|
|
+
|
|
+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
|
|
+ struct xen_netif_extra_info *gso;
|
|
+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
|
|
+
|
|
+ if (netbk_set_skb_gso(vif, skb, gso)) {
|
|
+ kfree_skb(skb);
|
|
+ netbk_tx_err(vif, &txreq, idx);
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* XXX could copy straight to head */
|
|
+ page = xen_netbk_alloc_page(netbk, skb, pending_idx);
|
|
+ if (!page) {
|
|
+ kfree_skb(skb);
|
|
+ netbk_tx_err(vif, &txreq, idx);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ netbk->mmap_pages[pending_idx] = page;
|
|
+
|
|
+ gop->source.u.ref = txreq.gref;
|
|
+ gop->source.domid = vif->domid;
|
|
+ gop->source.offset = txreq.offset;
|
|
+
|
|
+ gop->dest.u.gmfn = virt_to_mfn(page_address(page));
|
|
+ gop->dest.domid = DOMID_SELF;
|
|
+ gop->dest.offset = txreq.offset;
|
|
+
|
|
+ gop->len = txreq.size;
|
|
+ gop->flags = GNTCOPY_source_gref;
|
|
+
|
|
+ gop++;
|
|
+
|
|
+ memcpy(&netbk->pending_tx_info[pending_idx].req,
|
|
+ &txreq, sizeof(txreq));
|
|
+ netbk->pending_tx_info[pending_idx].vif = vif;
|
|
+ *((u16 *)skb->data) = pending_idx;
|
|
+
|
|
+ __skb_put(skb, data_len);
|
|
+
|
|
+ skb_shinfo(skb)->nr_frags = ret;
|
|
+ if (data_len < txreq.size) {
|
|
+ skb_shinfo(skb)->nr_frags++;
|
|
+ skb_shinfo(skb)->frags[0].page =
|
|
+ (void *)(unsigned long)pending_idx;
|
|
+ } else {
|
|
+ /* Discriminate from any valid pending_idx value. */
|
|
+ skb_shinfo(skb)->frags[0].page = (void *)~0UL;
|
|
+ }
|
|
+
|
|
+ __skb_queue_tail(&netbk->tx_queue, skb);
|
|
+
|
|
+ netbk->pending_cons++;
|
|
+
|
|
+ request_gop = xen_netbk_get_requests(netbk, vif,
|
|
+ skb, txfrags, gop);
|
|
+ if (request_gop == NULL) {
|
|
+ kfree_skb(skb);
|
|
+ netbk_tx_err(vif, &txreq, idx);
|
|
+ continue;
|
|
+ }
|
|
+ gop = request_gop;
|
|
+
|
|
+ vif->tx.req_cons = idx;
|
|
+ xen_netbk_check_rx_xenvif(vif);
|
|
+
|
|
+ if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return gop - netbk->tx_copy_ops;
|
|
+}
|
|
+
|
|
+static void xen_netbk_tx_submit(struct xen_netbk *netbk)
|
|
+{
|
|
+ struct gnttab_copy *gop = netbk->tx_copy_ops;
|
|
+ struct sk_buff *skb;
|
|
+
|
|
+ while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
|
|
+ struct xen_netif_tx_request *txp;
|
|
+ struct xenvif *vif;
|
|
+ u16 pending_idx;
|
|
+ unsigned data_len;
|
|
+
|
|
+ pending_idx = *((u16 *)skb->data);
|
|
+ vif = netbk->pending_tx_info[pending_idx].vif;
|
|
+ txp = &netbk->pending_tx_info[pending_idx].req;
|
|
+
|
|
+ /* Check the remap error code. */
|
|
+ if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) {
|
|
+ netdev_dbg(vif->dev, "netback grant failed.\n");
|
|
+ skb_shinfo(skb)->nr_frags = 0;
|
|
+ kfree_skb(skb);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ data_len = skb->len;
|
|
+ memcpy(skb->data,
|
|
+ (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
|
|
+ data_len);
|
|
+ if (data_len < txp->size) {
|
|
+ /* Append the packet payload as a fragment. */
|
|
+ txp->offset += data_len;
|
|
+ txp->size -= data_len;
|
|
+ } else {
|
|
+ /* Schedule a response immediately. */
|
|
+ xen_netbk_idx_release(netbk, pending_idx);
|
|
+ }
|
|
+
|
|
+ if (txp->flags & XEN_NETTXF_csum_blank)
|
|
+ skb->ip_summed = CHECKSUM_PARTIAL;
|
|
+ else if (txp->flags & XEN_NETTXF_data_validated)
|
|
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
+
|
|
+ xen_netbk_fill_frags(netbk, skb);
|
|
+
|
|
+ /*
|
|
+ * If the initial fragment was < PKT_PROT_LEN then
|
|
+ * pull through some bytes from the other fragments to
|
|
+ * increase the linear region to PKT_PROT_LEN bytes.
|
|
+ */
|
|
+ if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
|
|
+ int target = min_t(int, skb->len, PKT_PROT_LEN);
|
|
+ __pskb_pull_tail(skb, target - skb_headlen(skb));
|
|
+ }
|
|
+
|
|
+ skb->dev = vif->dev;
|
|
+ skb->protocol = eth_type_trans(skb, skb->dev);
|
|
+
|
|
+ if (checksum_setup(vif, skb)) {
|
|
+ netdev_dbg(vif->dev,
|
|
+ "Can't setup checksum in net_tx_action\n");
|
|
+ kfree_skb(skb);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ vif->stats.rx_bytes += skb->len;
|
|
+ vif->stats.rx_packets++;
|
|
+
|
|
+ xenvif_receive_skb(vif, skb);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Called after netfront has transmitted */
|
|
+static void xen_netbk_tx_action(struct xen_netbk *netbk)
|
|
+{
|
|
+ unsigned nr_gops;
|
|
+ int ret;
|
|
+
|
|
+ nr_gops = xen_netbk_tx_build_gops(netbk);
|
|
+
|
|
+ if (nr_gops == 0)
|
|
+ return;
|
|
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
|
|
+ netbk->tx_copy_ops, nr_gops);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ xen_netbk_tx_submit(netbk);
|
|
+
|
|
+}
|
|
+
|
|
+static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
|
|
+{
|
|
+ struct xenvif *vif;
|
|
+ struct pending_tx_info *pending_tx_info;
|
|
+ pending_ring_idx_t index;
|
|
+
|
|
+ /* Already complete? */
|
|
+ if (netbk->mmap_pages[pending_idx] == NULL)
|
|
+ return;
|
|
+
|
|
+ pending_tx_info = &netbk->pending_tx_info[pending_idx];
|
|
+
|
|
+ vif = pending_tx_info->vif;
|
|
+
|
|
+ make_tx_response(vif, &pending_tx_info->req, XEN_NETIF_RSP_OKAY);
|
|
+
|
|
+ index = pending_index(netbk->pending_prod++);
|
|
+ netbk->pending_ring[index] = pending_idx;
|
|
+
|
|
+ xenvif_put(vif);
|
|
+
|
|
+ netbk->mmap_pages[pending_idx]->mapping = 0;
|
|
+ put_page(netbk->mmap_pages[pending_idx]);
|
|
+ netbk->mmap_pages[pending_idx] = NULL;
|
|
+}
|
|
+
|
|
+static void make_tx_response(struct xenvif *vif,
|
|
+ struct xen_netif_tx_request *txp,
|
|
+ s8 st)
|
|
+{
|
|
+ RING_IDX i = vif->tx.rsp_prod_pvt;
|
|
+ struct xen_netif_tx_response *resp;
|
|
+ int notify;
|
|
+
|
|
+ resp = RING_GET_RESPONSE(&vif->tx, i);
|
|
+ resp->id = txp->id;
|
|
+ resp->status = st;
|
|
+
|
|
+ if (txp->flags & XEN_NETTXF_extra_info)
|
|
+ RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
|
|
+
|
|
+ vif->tx.rsp_prod_pvt = ++i;
|
|
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(vif->irq);
|
|
+}
|
|
+
|
|
+static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
|
|
+ u16 id,
|
|
+ s8 st,
|
|
+ u16 offset,
|
|
+ u16 size,
|
|
+ u16 flags)
|
|
+{
|
|
+ RING_IDX i = vif->rx.rsp_prod_pvt;
|
|
+ struct xen_netif_rx_response *resp;
|
|
+
|
|
+ resp = RING_GET_RESPONSE(&vif->rx, i);
|
|
+ resp->offset = offset;
|
|
+ resp->flags = flags;
|
|
+ resp->id = id;
|
|
+ resp->status = (s16)size;
|
|
+ if (st < 0)
|
|
+ resp->status = (s16)st;
|
|
+
|
|
+ vif->rx.rsp_prod_pvt = ++i;
|
|
+
|
|
+ return resp;
|
|
+}
|
|
+
|
|
+static inline int rx_work_todo(struct xen_netbk *netbk)
|
|
+{
|
|
+ return !skb_queue_empty(&netbk->rx_queue);
|
|
+}
|
|
+
|
|
+static inline int tx_work_todo(struct xen_netbk *netbk)
|
|
+{
|
|
+
|
|
+ if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
|
|
+ !list_empty(&netbk->net_schedule_list))
|
|
+ return 1;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xen_netbk_kthread(void *data)
|
|
+{
|
|
+ struct xen_netbk *netbk = data;
|
|
+ while (!kthread_should_stop()) {
|
|
+ wait_event_interruptible(netbk->wq,
|
|
+ rx_work_todo(netbk) ||
|
|
+ tx_work_todo(netbk) ||
|
|
+ kthread_should_stop());
|
|
+ cond_resched();
|
|
+
|
|
+ if (kthread_should_stop())
|
|
+ break;
|
|
+
|
|
+ if (rx_work_todo(netbk))
|
|
+ xen_netbk_rx_action(netbk);
|
|
+
|
|
+ if (tx_work_todo(netbk))
|
|
+ xen_netbk_tx_action(netbk);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+
|
|
+ if (vif->tx.sring) {
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)vif->tx_comms_area->addr,
|
|
+ GNTMAP_host_map, vif->tx_shmem_handle);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ if (vif->rx.sring) {
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)vif->rx_comms_area->addr,
|
|
+ GNTMAP_host_map, vif->rx_shmem_handle);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+ }
|
|
+ if (vif->rx_comms_area)
|
|
+ free_vm_area(vif->rx_comms_area);
|
|
+ if (vif->tx_comms_area)
|
|
+ free_vm_area(vif->tx_comms_area);
|
|
+}
|
|
+
|
|
+int xen_netbk_map_frontend_rings(struct xenvif *vif,
|
|
+ grant_ref_t tx_ring_ref,
|
|
+ grant_ref_t rx_ring_ref)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+ struct xen_netif_tx_sring *txs;
|
|
+ struct xen_netif_rx_sring *rxs;
|
|
+
|
|
+ int err = -ENOMEM;
|
|
+
|
|
+ vif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
|
|
+ if (vif->tx_comms_area == NULL)
|
|
+ goto err;
|
|
+
|
|
+ vif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
|
|
+ if (vif->rx_comms_area == NULL)
|
|
+ goto err;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)vif->tx_comms_area->addr,
|
|
+ GNTMAP_host_map, tx_ring_ref, vif->domid);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+
|
|
+ if (op.status) {
|
|
+ netdev_warn(vif->dev,
|
|
+ "failed to map tx ring. err=%d status=%d\n",
|
|
+ err, op.status);
|
|
+ err = op.status;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ vif->tx_shmem_ref = tx_ring_ref;
|
|
+ vif->tx_shmem_handle = op.handle;
|
|
+
|
|
+ txs = (struct xen_netif_tx_sring *)vif->tx_comms_area->addr;
|
|
+ BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)vif->rx_comms_area->addr,
|
|
+ GNTMAP_host_map, rx_ring_ref, vif->domid);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+
|
|
+ if (op.status) {
|
|
+ netdev_warn(vif->dev,
|
|
+ "failed to map rx ring. err=%d status=%d\n",
|
|
+ err, op.status);
|
|
+ err = op.status;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ vif->rx_shmem_ref = rx_ring_ref;
|
|
+ vif->rx_shmem_handle = op.handle;
|
|
+ vif->rx_req_cons_peek = 0;
|
|
+
|
|
+ rxs = (struct xen_netif_rx_sring *)vif->rx_comms_area->addr;
|
|
+ BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+err:
|
|
+ xen_netbk_unmap_frontend_rings(vif);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int __init netback_init(void)
|
|
+{
|
|
+ int i;
|
|
+ int rc = 0;
|
|
+ int group;
|
|
+
|
|
+ if (!xen_pv_domain())
|
|
+ return -ENODEV;
|
|
+
|
|
+ xen_netbk_group_nr = num_online_cpus();
|
|
+ xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
|
|
+ if (!xen_netbk) {
|
|
+ printk(KERN_ALERT "%s: out of memory\n", __func__);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ for (group = 0; group < xen_netbk_group_nr; group++) {
|
|
+ struct xen_netbk *netbk = &xen_netbk[group];
|
|
+ skb_queue_head_init(&netbk->rx_queue);
|
|
+ skb_queue_head_init(&netbk->tx_queue);
|
|
+
|
|
+ init_timer(&netbk->net_timer);
|
|
+ netbk->net_timer.data = (unsigned long)netbk;
|
|
+ netbk->net_timer.function = xen_netbk_alarm;
|
|
+
|
|
+ netbk->pending_cons = 0;
|
|
+ netbk->pending_prod = MAX_PENDING_REQS;
|
|
+ for (i = 0; i < MAX_PENDING_REQS; i++)
|
|
+ netbk->pending_ring[i] = i;
|
|
+
|
|
+ init_waitqueue_head(&netbk->wq);
|
|
+ netbk->task = kthread_create(xen_netbk_kthread,
|
|
+ (void *)netbk,
|
|
+ "netback/%u", group);
|
|
+
|
|
+ if (IS_ERR(netbk->task)) {
|
|
+ printk(KERN_ALERT "kthread_run() fails at netback\n");
|
|
+ del_timer(&netbk->net_timer);
|
|
+ rc = PTR_ERR(netbk->task);
|
|
+ goto failed_init;
|
|
+ }
|
|
+
|
|
+ kthread_bind(netbk->task, group);
|
|
+
|
|
+ INIT_LIST_HEAD(&netbk->net_schedule_list);
|
|
+
|
|
+ spin_lock_init(&netbk->net_schedule_list_lock);
|
|
+
|
|
+ atomic_set(&netbk->netfront_count, 0);
|
|
+
|
|
+ wake_up_process(netbk->task);
|
|
+ }
|
|
+
|
|
+ rc = xenvif_xenbus_init();
|
|
+ if (rc)
|
|
+ goto failed_init;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+failed_init:
|
|
+ while (--group >= 0) {
|
|
+ struct xen_netbk *netbk = &xen_netbk[group];
|
|
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
|
|
+ if (netbk->mmap_pages[i])
|
|
+ __free_page(netbk->mmap_pages[i]);
|
|
+ }
|
|
+ del_timer(&netbk->net_timer);
|
|
+ kthread_stop(netbk->task);
|
|
+ }
|
|
+ vfree(xen_netbk);
|
|
+ return rc;
|
|
+
|
|
+}
|
|
+
|
|
+module_init(netback_init);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
|
|
new file mode 100644
|
|
index 0000000..22b8c35
|
|
--- /dev/null
|
|
+++ b/drivers/net/xen-netback/xenbus.c
|
|
@@ -0,0 +1,490 @@
|
|
+/*
|
|
+ * Xenbus code for netif backend
|
|
+ *
|
|
+ * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
|
|
+ * Copyright (C) 2005 XenSource Ltd
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
+*/
|
|
+
|
|
+#include "common.h"
|
|
+
|
|
+struct backend_info {
|
|
+ struct xenbus_device *dev;
|
|
+ struct xenvif *vif;
|
|
+ enum xenbus_state frontend_state;
|
|
+ struct xenbus_watch hotplug_status_watch;
|
|
+ int have_hotplug_status_watch:1;
|
|
+};
|
|
+
|
|
+static int connect_rings(struct backend_info *);
|
|
+static void connect(struct backend_info *);
|
|
+static void backend_create_xenvif(struct backend_info *be);
|
|
+static void unregister_hotplug_status_watch(struct backend_info *be);
|
|
+
|
|
+static int netback_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
|
|
+
|
|
+ unregister_hotplug_status_watch(be);
|
|
+ if (be->vif) {
|
|
+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
|
|
+ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
|
|
+ xenvif_disconnect(be->vif);
|
|
+ be->vif = NULL;
|
|
+ }
|
|
+ kfree(be);
|
|
+ dev_set_drvdata(&dev->dev, NULL);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * Entry point to this code when a new device is created. Allocate the basic
|
|
+ * structures and switch to InitWait.
|
|
+ */
|
|
+static int netback_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ const char *message;
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+ int sg;
|
|
+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
|
|
+ GFP_KERNEL);
|
|
+ if (!be) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM,
|
|
+ "allocating backend structure");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ be->dev = dev;
|
|
+ dev_set_drvdata(&dev->dev, be);
|
|
+
|
|
+ sg = 1;
|
|
+
|
|
+ do {
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "starting transaction");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
|
|
+ if (err) {
|
|
+ message = "writing feature-sg";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
|
|
+ "%d", sg);
|
|
+ if (err) {
|
|
+ message = "writing feature-gso-tcpv4";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ /* We support rx-copy path. */
|
|
+ err = xenbus_printf(xbt, dev->nodename,
|
|
+ "feature-rx-copy", "%d", 1);
|
|
+ if (err) {
|
|
+ message = "writing feature-rx-copy";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * We don't support rx-flip path (except old guests who don't
|
|
+ * grok this feature flag).
|
|
+ */
|
|
+ err = xenbus_printf(xbt, dev->nodename,
|
|
+ "feature-rx-flip", "%d", 0);
|
|
+ if (err) {
|
|
+ message = "writing feature-rx-flip";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ } while (err == -EAGAIN);
|
|
+
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "completing transaction");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ /* This kicks hotplug scripts, so do it immediately. */
|
|
+ backend_create_xenvif(be);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+abort_transaction:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+ xenbus_dev_fatal(dev, err, "%s", message);
|
|
+fail:
|
|
+ pr_debug("failed");
|
|
+ netback_remove(dev);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Handle the creation of the hotplug script environment. We add the script
|
|
+ * and vif variables to the environment, for the benefit of the vif-* hotplug
|
|
+ * scripts.
|
|
+ */
|
|
+static int netback_uevent(struct xenbus_device *xdev,
|
|
+ struct kobj_uevent_env *env)
|
|
+{
|
|
+ struct backend_info *be = dev_get_drvdata(&xdev->dev);
|
|
+ char *val;
|
|
+
|
|
+ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
|
|
+ if (IS_ERR(val)) {
|
|
+ int err = PTR_ERR(val);
|
|
+ xenbus_dev_fatal(xdev, err, "reading script");
|
|
+ return err;
|
|
+ } else {
|
|
+ if (add_uevent_var(env, "script=%s", val)) {
|
|
+ kfree(val);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ kfree(val);
|
|
+ }
|
|
+
|
|
+ if (!be || !be->vif)
|
|
+ return 0;
|
|
+
|
|
+ return add_uevent_var(env, "vif=%s", be->vif->dev->name);
|
|
+}
|
|
+
|
|
+
|
|
+static void backend_create_xenvif(struct backend_info *be)
|
|
+{
|
|
+ int err;
|
|
+ long handle;
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+
|
|
+ if (be->vif != NULL)
|
|
+ return;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
|
|
+ if (err != 1) {
|
|
+ xenbus_dev_fatal(dev, err, "reading handle");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ be->vif = xenvif_alloc(&dev->dev, dev->otherend_id, handle);
|
|
+ if (IS_ERR(be->vif)) {
|
|
+ err = PTR_ERR(be->vif);
|
|
+ be->vif = NULL;
|
|
+ xenbus_dev_fatal(dev, err, "creating interface");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
|
|
+}
|
|
+
|
|
+
|
|
+static void disconnect_backend(struct xenbus_device *dev)
|
|
+{
|
|
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
|
|
+
|
|
+ if (be->vif) {
|
|
+ xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
|
|
+ xenvif_disconnect(be->vif);
|
|
+ be->vif = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Callback received when the frontend's state changes.
|
|
+ */
|
|
+static void frontend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state frontend_state)
|
|
+{
|
|
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
|
|
+
|
|
+ pr_debug("frontend state %s", xenbus_strstate(frontend_state));
|
|
+
|
|
+ be->frontend_state = frontend_state;
|
|
+
|
|
+ switch (frontend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ if (dev->state == XenbusStateClosed) {
|
|
+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
|
|
+ __func__, dev->nodename);
|
|
+ xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitialised:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ if (dev->state == XenbusStateConnected)
|
|
+ break;
|
|
+ backend_create_xenvif(be);
|
|
+ if (be->vif)
|
|
+ connect(be);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ if (be->vif)
|
|
+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
|
|
+ disconnect_backend(dev);
|
|
+ xenbus_switch_state(dev, XenbusStateClosing);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosed:
|
|
+ xenbus_switch_state(dev, XenbusStateClosed);
|
|
+ if (xenbus_dev_is_online(dev))
|
|
+ break;
|
|
+ /* fall through if not online */
|
|
+ case XenbusStateUnknown:
|
|
+ device_unregister(&dev->dev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
|
|
+ frontend_state);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static void xen_net_read_rate(struct xenbus_device *dev,
|
|
+ unsigned long *bytes, unsigned long *usec)
|
|
+{
|
|
+ char *s, *e;
|
|
+ unsigned long b, u;
|
|
+ char *ratestr;
|
|
+
|
|
+ /* Default to unlimited bandwidth. */
|
|
+ *bytes = ~0UL;
|
|
+ *usec = 0;
|
|
+
|
|
+ ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
|
|
+ if (IS_ERR(ratestr))
|
|
+ return;
|
|
+
|
|
+ s = ratestr;
|
|
+ b = simple_strtoul(s, &e, 10);
|
|
+ if ((s == e) || (*e != ','))
|
|
+ goto fail;
|
|
+
|
|
+ s = e + 1;
|
|
+ u = simple_strtoul(s, &e, 10);
|
|
+ if ((s == e) || (*e != '\0'))
|
|
+ goto fail;
|
|
+
|
|
+ *bytes = b;
|
|
+ *usec = u;
|
|
+
|
|
+ kfree(ratestr);
|
|
+ return;
|
|
+
|
|
+ fail:
|
|
+ pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
|
|
+ kfree(ratestr);
|
|
+}
|
|
+
|
|
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
|
|
+{
|
|
+ char *s, *e, *macstr;
|
|
+ int i;
|
|
+
|
|
+ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
|
|
+ if (IS_ERR(macstr))
|
|
+ return PTR_ERR(macstr);
|
|
+
|
|
+ for (i = 0; i < ETH_ALEN; i++) {
|
|
+ mac[i] = simple_strtoul(s, &e, 16);
|
|
+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
|
|
+ kfree(macstr);
|
|
+ return -ENOENT;
|
|
+ }
|
|
+ s = e+1;
|
|
+ }
|
|
+
|
|
+ kfree(macstr);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void unregister_hotplug_status_watch(struct backend_info *be)
|
|
+{
|
|
+ if (be->have_hotplug_status_watch) {
|
|
+ unregister_xenbus_watch(&be->hotplug_status_watch);
|
|
+ kfree(be->hotplug_status_watch.node);
|
|
+ }
|
|
+ be->have_hotplug_status_watch = 0;
|
|
+}
|
|
+
|
|
+static void hotplug_status_changed(struct xenbus_watch *watch,
|
|
+ const char **vec,
|
|
+ unsigned int vec_size)
|
|
+{
|
|
+ struct backend_info *be = container_of(watch,
|
|
+ struct backend_info,
|
|
+ hotplug_status_watch);
|
|
+ char *str;
|
|
+ unsigned int len;
|
|
+
|
|
+ str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
|
|
+ if (IS_ERR(str))
|
|
+ return;
|
|
+ if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
|
|
+ xenbus_switch_state(be->dev, XenbusStateConnected);
|
|
+ /* Not interested in this watch anymore. */
|
|
+ unregister_hotplug_status_watch(be);
|
|
+ }
|
|
+ kfree(str);
|
|
+}
|
|
+
|
|
+static void connect(struct backend_info *be)
|
|
+{
|
|
+ int err;
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+
|
|
+ err = connect_rings(be);
|
|
+ if (err)
|
|
+ return;
|
|
+
|
|
+ err = xen_net_read_mac(dev, be->vif->fe_dev_addr);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ xen_net_read_rate(dev, &be->vif->credit_bytes,
|
|
+ &be->vif->credit_usec);
|
|
+ be->vif->remaining_credit = be->vif->credit_bytes;
|
|
+
|
|
+ unregister_hotplug_status_watch(be);
|
|
+ err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
|
|
+ hotplug_status_changed,
|
|
+ "%s/%s", dev->nodename, "hotplug-status");
|
|
+ if (err) {
|
|
+ /* Switch now, since we can't do a watch. */
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ } else {
|
|
+ be->have_hotplug_status_watch = 1;
|
|
+ }
|
|
+
|
|
+ netif_wake_queue(be->vif->dev);
|
|
+}
|
|
+
|
|
+
|
|
+static int connect_rings(struct backend_info *be)
|
|
+{
|
|
+ struct xenvif *vif = be->vif;
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ unsigned long tx_ring_ref, rx_ring_ref;
|
|
+ unsigned int evtchn, rx_copy;
|
|
+ int err;
|
|
+ int val;
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend,
|
|
+ "tx-ring-ref", "%lu", &tx_ring_ref,
|
|
+ "rx-ring-ref", "%lu", &rx_ring_ref,
|
|
+ "event-channel", "%u", &evtchn, NULL);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "reading %s/ring-ref and event-channel",
|
|
+ dev->otherend);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
|
|
+ &rx_copy);
|
|
+ if (err == -ENOENT) {
|
|
+ err = 0;
|
|
+ rx_copy = 0;
|
|
+ }
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
|
|
+ dev->otherend);
|
|
+ return err;
|
|
+ }
|
|
+ if (!rx_copy)
|
|
+ return -EOPNOTSUPP;
|
|
+
|
|
+ if (vif->dev->tx_queue_len != 0) {
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
|
|
+ "feature-rx-notify", "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ if (val)
|
|
+ vif->can_queue = 1;
|
|
+ else
|
|
+ /* Must be non-zero for pfifo_fast to work. */
|
|
+ vif->dev->tx_queue_len = 1;
|
|
+ }
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
|
|
+ "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ vif->can_sg = !!val;
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
|
|
+ "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ vif->gso = !!val;
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
|
|
+ "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ vif->gso_prefix = !!val;
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
|
|
+ "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ vif->csum = !val;
|
|
+
|
|
+ /* Map the shared frame, irq etc. */
|
|
+ err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, evtchn);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "mapping shared-frames %lu/%lu port %u",
|
|
+ tx_ring_ref, rx_ring_ref, evtchn);
|
|
+ return err;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/* ** Driver Registration ** */
|
|
+
|
|
+
|
|
+static const struct xenbus_device_id netback_ids[] = {
|
|
+ { "vif" },
|
|
+ { "" }
|
|
+};
|
|
+
|
|
+
|
|
+static struct xenbus_driver netback = {
|
|
+ .name = "vif",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = netback_ids,
|
|
+ .probe = netback_probe,
|
|
+ .remove = netback_remove,
|
|
+ .uevent = netback_uevent,
|
|
+ .otherend_changed = frontend_changed,
|
|
+};
|
|
+
|
|
+int xenvif_xenbus_init(void)
|
|
+{
|
|
+ return xenbus_register_backend(&netback);
|
|
+}
|
|
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
|
|
index da1f121..a6ab973 100644
|
|
--- a/drivers/net/xen-netfront.c
|
|
+++ b/drivers/net/xen-netfront.c
|
|
@@ -359,7 +359,7 @@ static void xennet_tx_buf_gc(struct net_device *dev)
|
|
struct xen_netif_tx_response *txrsp;
|
|
|
|
txrsp = RING_GET_RESPONSE(&np->tx, cons);
|
|
- if (txrsp->status == NETIF_RSP_NULL)
|
|
+ if (txrsp->status == XEN_NETIF_RSP_NULL)
|
|
continue;
|
|
|
|
id = txrsp->id;
|
|
@@ -416,7 +416,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
|
|
larger than a page), split it it into page-sized chunks. */
|
|
while (len > PAGE_SIZE - offset) {
|
|
tx->size = PAGE_SIZE - offset;
|
|
- tx->flags |= NETTXF_more_data;
|
|
+ tx->flags |= XEN_NETTXF_more_data;
|
|
len -= tx->size;
|
|
data += tx->size;
|
|
offset = 0;
|
|
@@ -442,7 +442,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
|
|
for (i = 0; i < frags; i++) {
|
|
skb_frag_t *frag = skb_shinfo(skb)->frags + i;
|
|
|
|
- tx->flags |= NETTXF_more_data;
|
|
+ tx->flags |= XEN_NETTXF_more_data;
|
|
|
|
id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
|
|
np->tx_skbs[id].skb = skb_get(skb);
|
|
@@ -517,10 +517,10 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
tx->flags = 0;
|
|
if (skb->ip_summed == CHECKSUM_PARTIAL)
|
|
/* local packet? */
|
|
- tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
|
|
+ tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
|
|
else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
|
|
/* remote but checksummed. */
|
|
- tx->flags |= NETTXF_data_validated;
|
|
+ tx->flags |= XEN_NETTXF_data_validated;
|
|
|
|
if (skb_shinfo(skb)->gso_size) {
|
|
struct xen_netif_extra_info *gso;
|
|
@@ -531,7 +531,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
if (extra)
|
|
extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
|
|
else
|
|
- tx->flags |= NETTXF_extra_info;
|
|
+ tx->flags |= XEN_NETTXF_extra_info;
|
|
|
|
gso->u.gso.size = skb_shinfo(skb)->gso_size;
|
|
gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
|
|
@@ -651,7 +651,7 @@ static int xennet_get_responses(struct netfront_info *np,
|
|
int err = 0;
|
|
unsigned long ret;
|
|
|
|
- if (rx->flags & NETRXF_extra_info) {
|
|
+ if (rx->flags & XEN_NETRXF_extra_info) {
|
|
err = xennet_get_extras(np, extras, rp);
|
|
cons = np->rx.rsp_cons;
|
|
}
|
|
@@ -688,7 +688,7 @@ static int xennet_get_responses(struct netfront_info *np,
|
|
__skb_queue_tail(list, skb);
|
|
|
|
next:
|
|
- if (!(rx->flags & NETRXF_more_data))
|
|
+ if (!(rx->flags & XEN_NETRXF_more_data))
|
|
break;
|
|
|
|
if (cons + frags == rp) {
|
|
@@ -983,9 +983,9 @@ err:
|
|
skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
|
|
skb->len += skb->data_len;
|
|
|
|
- if (rx->flags & NETRXF_csum_blank)
|
|
+ if (rx->flags & XEN_NETRXF_csum_blank)
|
|
skb->ip_summed = CHECKSUM_PARTIAL;
|
|
- else if (rx->flags & NETRXF_data_validated)
|
|
+ else if (rx->flags & XEN_NETRXF_data_validated)
|
|
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
|
|
__skb_queue_tail(&rxq, skb);
|
|
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
|
|
index 3a5a6fc..492b7d8 100644
|
|
--- a/drivers/pci/xen-pcifront.c
|
|
+++ b/drivers/pci/xen-pcifront.c
|
|
@@ -243,7 +243,7 @@ struct pci_ops pcifront_bus_ops = {
|
|
|
|
#ifdef CONFIG_PCI_MSI
|
|
static int pci_frontend_enable_msix(struct pci_dev *dev,
|
|
- int **vector, int nvec)
|
|
+ int vector[], int nvec)
|
|
{
|
|
int err;
|
|
int i;
|
|
@@ -277,18 +277,24 @@ static int pci_frontend_enable_msix(struct pci_dev *dev,
|
|
if (likely(!err)) {
|
|
if (likely(!op.value)) {
|
|
/* we get the result */
|
|
- for (i = 0; i < nvec; i++)
|
|
- *(*vector+i) = op.msix_entries[i].vector;
|
|
- return 0;
|
|
+ for (i = 0; i < nvec; i++) {
|
|
+ if (op.msix_entries[i].vector <= 0) {
|
|
+ dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
|
|
+ i, op.msix_entries[i].vector);
|
|
+ err = -EINVAL;
|
|
+ vector[i] = -1;
|
|
+ continue;
|
|
+ }
|
|
+ vector[i] = op.msix_entries[i].vector;
|
|
+ }
|
|
} else {
|
|
printk(KERN_DEBUG "enable msix get value %x\n",
|
|
op.value);
|
|
- return op.value;
|
|
}
|
|
} else {
|
|
dev_err(&dev->dev, "enable msix get err %x\n", err);
|
|
- return err;
|
|
}
|
|
+ return err;
|
|
}
|
|
|
|
static void pci_frontend_disable_msix(struct pci_dev *dev)
|
|
@@ -310,7 +316,7 @@ static void pci_frontend_disable_msix(struct pci_dev *dev)
|
|
dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
|
|
}
|
|
|
|
-static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
|
|
+static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
|
|
{
|
|
int err;
|
|
struct xen_pci_op op = {
|
|
@@ -324,7 +330,13 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
|
|
|
|
err = do_pci_op(pdev, &op);
|
|
if (likely(!err)) {
|
|
- *(*vector) = op.value;
|
|
+ vector[0] = op.value;
|
|
+ if (op.value <= 0) {
|
|
+ dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
|
|
+ op.value);
|
|
+ err = -EINVAL;
|
|
+ vector[0] = -1;
|
|
+ }
|
|
} else {
|
|
dev_err(&dev->dev, "pci frontend enable msi failed for dev "
|
|
"%x:%x\n", op.bus, op.devfn);
|
|
@@ -733,8 +745,7 @@ static void free_pdev(struct pcifront_device *pdev)
|
|
|
|
pcifront_free_roots(pdev);
|
|
|
|
- /*For PCIE_AER error handling job*/
|
|
- flush_scheduled_work();
|
|
+ cancel_work_sync(&pdev->op_work);
|
|
|
|
if (pdev->irq >= 0)
|
|
unbind_from_irqhandler(pdev->irq, pdev);
|
|
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
|
|
index 07bec09..e5ecae6 100644
|
|
--- a/drivers/xen/Kconfig
|
|
+++ b/drivers/xen/Kconfig
|
|
@@ -37,6 +37,79 @@ config XEN_BACKEND
|
|
Support for backend device drivers that provide I/O services
|
|
to other virtual machines.
|
|
|
|
+config XEN_BLKDEV_BACKEND
|
|
+ tristate "Block-device backend driver"
|
|
+ depends on XEN_BACKEND && BLOCK
|
|
+ help
|
|
+ The block-device backend driver allows the kernel to export its
|
|
+ block devices to other guests via a high-performance shared-memory
|
|
+ interface.
|
|
+
|
|
+config XEN_PCIDEV_BACKEND
|
|
+ tristate "PCI-device backend driver"
|
|
+ depends on PCI
|
|
+ depends on XEN_BACKEND
|
|
+ help
|
|
+ The PCI device backend driver allows the kernel to export arbitrary
|
|
+ PCI devices to other guests. If you select this to be a module, you
|
|
+ will need to make sure no other driver has bound to the device(s)
|
|
+ you want to make visible to other guests.
|
|
+
|
|
+choice
|
|
+ prompt "PCI Backend Mode"
|
|
+ depends on XEN_PCIDEV_BACKEND
|
|
+ default XEN_PCIDEV_BACKEND_VPCI if !IA64
|
|
+ default XEN_PCIDEV_BACKEND_CONTROLLER if IA64
|
|
+
|
|
+config XEN_PCIDEV_BACKEND_VPCI
|
|
+ bool "Virtual PCI"
|
|
+ ---help---
|
|
+ This PCI Backend hides the true PCI topology and makes the frontend
|
|
+ think there is a single PCI bus with only the exported devices on it.
|
|
+ For example, a device at 03:05.0 will be re-assigned to 00:00.0. A
|
|
+ second device at 02:1a.1 will be re-assigned to 00:01.1.
|
|
+
|
|
+config XEN_PCIDEV_BACKEND_PASS
|
|
+ bool "Passthrough"
|
|
+ ---help---
|
|
+ This PCI Backend provides a real view of the PCI topology to the
|
|
+ frontend (for example, a device at 06:01.b will still appear at
|
|
+ 06:01.b to the frontend). This is similar to how Xen 2.0.x exposed
|
|
+ PCI devices to its driver domains. This may be required for drivers
|
|
+ which depend on finding their hardward in certain bus/slot
|
|
+ locations.
|
|
+
|
|
+config XEN_PCIDEV_BACKEND_SLOT
|
|
+ bool "Slot"
|
|
+ ---help---
|
|
+ This PCI Backend hides the true PCI topology and makes the frontend
|
|
+ think there is a single PCI bus with only the exported devices on it.
|
|
+ Contrary to the virtual PCI backend, a function becomes a new slot.
|
|
+ For example, a device at 03:05.2 will be re-assigned to 00:00.0. A
|
|
+ second device at 02:1a.1 will be re-assigned to 00:01.0.
|
|
+
|
|
+config XEN_PCIDEV_BACKEND_CONTROLLER
|
|
+ bool "Controller"
|
|
+ depends on IA64
|
|
+ ---help---
|
|
+ This PCI backend virtualizes the PCI bus topology by providing a
|
|
+ virtual bus per PCI root device. Devices which are physically under
|
|
+ the same root bus will appear on the same virtual bus. For systems
|
|
+ with complex I/O addressing, this is the only backend which supports
|
|
+ extended I/O port spaces and MMIO translation offsets. This backend
|
|
+ also supports slot virtualization. For example, a device at
|
|
+ 0000:01:02.1 will be re-assigned to 0000:00:00.0. A second device
|
|
+ at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be
|
|
+ re-assigned to 0000:00:01.0. A third device at 0000:16:05.0 (under
|
|
+ a different PCI root bus) will be re-assigned to 0000:01:00.0.
|
|
+
|
|
+endchoice
|
|
+
|
|
+config XEN_PCIDEV_BE_DEBUG
|
|
+ bool "PCI Backend Debugging"
|
|
+ depends on XEN_PCIDEV_BACKEND
|
|
+
|
|
+
|
|
config XENFS
|
|
tristate "Xen filesystem"
|
|
default y
|
|
@@ -76,10 +149,20 @@ config XEN_XENBUS_FRONTEND
|
|
config XEN_GNTDEV
|
|
tristate "userspace grant access device driver"
|
|
depends on XEN
|
|
+ default m
|
|
select MMU_NOTIFIER
|
|
help
|
|
Allows userspace processes to use grants.
|
|
|
|
+config XEN_GRANT_DEV_ALLOC
|
|
+ tristate "User-space grant reference allocator driver"
|
|
+ depends on XEN
|
|
+ default m
|
|
+ help
|
|
+ Allows userspace processes to create pages with access granted
|
|
+ to other domains. This can be used to implement frontend drivers
|
|
+ or as part of an inter-domain shared memory channel.
|
|
+
|
|
config XEN_PLATFORM_PCI
|
|
tristate "xen platform pci device driver"
|
|
depends on XEN_PVHVM && PCI
|
|
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
|
|
index 5088cc2..c1cb873 100644
|
|
--- a/drivers/xen/Makefile
|
|
+++ b/drivers/xen/Makefile
|
|
@@ -9,7 +9,10 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
|
|
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
|
|
obj-$(CONFIG_XEN_BALLOON) += balloon.o
|
|
obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
|
|
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/
|
|
+obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
|
|
obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
|
|
+obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
|
|
obj-$(CONFIG_XENFS) += xenfs/
|
|
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
|
|
obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
|
|
@@ -18,5 +21,6 @@ obj-$(CONFIG_XEN_DOM0) += pci.o
|
|
|
|
xen-evtchn-y := evtchn.o
|
|
xen-gntdev-y := gntdev.o
|
|
+xen-gntalloc-y := gntalloc.o
|
|
|
|
xen-platform-pci-y := platform-pci.o
|
|
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
|
|
index 43f9f02..718050a 100644
|
|
--- a/drivers/xen/balloon.c
|
|
+++ b/drivers/xen/balloon.c
|
|
@@ -232,7 +232,7 @@ static int increase_reservation(unsigned long nr_pages)
|
|
set_phys_to_machine(pfn, frame_list[i]);
|
|
|
|
/* Link back into the page tables if not highmem. */
|
|
- if (pfn < max_low_pfn) {
|
|
+ if (!xen_hvm_domain() && pfn < max_low_pfn) {
|
|
int ret;
|
|
ret = HYPERVISOR_update_va_mapping(
|
|
(unsigned long)__va(pfn << PAGE_SHIFT),
|
|
@@ -280,7 +280,7 @@ static int decrease_reservation(unsigned long nr_pages)
|
|
|
|
scrub_page(page);
|
|
|
|
- if (!PageHighMem(page)) {
|
|
+ if (!xen_hvm_domain() && !PageHighMem(page)) {
|
|
ret = HYPERVISOR_update_va_mapping(
|
|
(unsigned long)__va(pfn << PAGE_SHIFT),
|
|
__pte_ma(0), 0);
|
|
@@ -296,7 +296,7 @@ static int decrease_reservation(unsigned long nr_pages)
|
|
/* No more mappings: invalidate P2M and add to balloon. */
|
|
for (i = 0; i < nr_pages; i++) {
|
|
pfn = mfn_to_pfn(frame_list[i]);
|
|
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
balloon_append(pfn_to_page(pfn));
|
|
}
|
|
|
|
@@ -392,15 +392,19 @@ static struct notifier_block xenstore_notifier;
|
|
|
|
static int __init balloon_init(void)
|
|
{
|
|
- unsigned long pfn, extra_pfn_end;
|
|
+ unsigned long pfn, nr_pages, extra_pfn_end;
|
|
struct page *page;
|
|
|
|
- if (!xen_pv_domain())
|
|
+ if (!xen_domain())
|
|
return -ENODEV;
|
|
|
|
pr_info("xen_balloon: Initialising balloon driver.\n");
|
|
|
|
- balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
|
|
+ if (xen_pv_domain())
|
|
+ nr_pages = xen_start_info->nr_pages;
|
|
+ else
|
|
+ nr_pages = max_pfn;
|
|
+ balloon_stats.current_pages = min(nr_pages, max_pfn);
|
|
balloon_stats.target_pages = balloon_stats.current_pages;
|
|
balloon_stats.balloon_low = 0;
|
|
balloon_stats.balloon_high = 0;
|
|
diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile
|
|
new file mode 100644
|
|
index 0000000..f1ae1ff
|
|
--- /dev/null
|
|
+++ b/drivers/xen/blkback/Makefile
|
|
@@ -0,0 +1,3 @@
|
|
+obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o
|
|
+
|
|
+xen-blkback-y := blkback.o xenbus.o interface.o vbd.o
|
|
diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c
|
|
new file mode 100644
|
|
index 0000000..15790ae
|
|
--- /dev/null
|
|
+++ b/drivers/xen/blkback/blkback.c
|
|
@@ -0,0 +1,708 @@
|
|
+/******************************************************************************
|
|
+ * arch/xen/drivers/blkif/backend/main.c
|
|
+ *
|
|
+ * Back-end of the driver for virtual block devices. This portion of the
|
|
+ * driver exports a 'unified' block-device interface that can be accessed
|
|
+ * by any operating system that implements a compatible front end. A
|
|
+ * reference front-end implementation can be found in:
|
|
+ * arch/xen/drivers/blkif/frontend
|
|
+ *
|
|
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
|
|
+ * Copyright (c) 2005, Christopher Clark
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/delay.h>
|
|
+#include <linux/freezer.h>
|
|
+
|
|
+#include <xen/events.h>
|
|
+#include <xen/page.h>
|
|
+#include <asm/xen/hypervisor.h>
|
|
+#include <asm/xen/hypercall.h>
|
|
+#include "common.h"
|
|
+
|
|
+/*
|
|
+ * These are rather arbitrary. They are fairly large because adjacent requests
|
|
+ * pulled from a communication ring are quite likely to end up being part of
|
|
+ * the same scatter/gather request at the disc.
|
|
+ *
|
|
+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
|
|
+ *
|
|
+ * This will increase the chances of being able to write whole tracks.
|
|
+ * 64 should be enough to keep us competitive with Linux.
|
|
+ */
|
|
+static int blkif_reqs = 64;
|
|
+module_param_named(reqs, blkif_reqs, int, 0);
|
|
+MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
|
|
+
|
|
+/* Run-time switchable: /sys/module/blkback/parameters/ */
|
|
+static unsigned int log_stats = 0;
|
|
+static unsigned int debug_lvl = 0;
|
|
+module_param(log_stats, int, 0644);
|
|
+module_param(debug_lvl, int, 0644);
|
|
+
|
|
+/*
|
|
+ * Each outstanding request that we've passed to the lower device layers has a
|
|
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
|
|
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
|
|
+ * response queued for it, with the saved 'id' passed back.
|
|
+ */
|
|
+typedef struct {
|
|
+ blkif_t *blkif;
|
|
+ u64 id;
|
|
+ int nr_pages;
|
|
+ atomic_t pendcnt;
|
|
+ unsigned short operation;
|
|
+ int status;
|
|
+ struct list_head free_list;
|
|
+} pending_req_t;
|
|
+
|
|
+#define BLKBACK_INVALID_HANDLE (~0)
|
|
+
|
|
+struct xen_blkbk {
|
|
+ pending_req_t *pending_reqs;
|
|
+ struct list_head pending_free;
|
|
+ spinlock_t pending_free_lock;
|
|
+ wait_queue_head_t pending_free_wq;
|
|
+ struct page **pending_pages;
|
|
+ grant_handle_t *pending_grant_handles;
|
|
+};
|
|
+
|
|
+static struct xen_blkbk *blkbk;
|
|
+
|
|
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
|
|
+{
|
|
+ return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
|
|
+}
|
|
+
|
|
+#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
|
|
+
|
|
+static inline unsigned long vaddr(pending_req_t *req, int seg)
|
|
+{
|
|
+ unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
|
|
+ return (unsigned long)pfn_to_kaddr(pfn);
|
|
+}
|
|
+
|
|
+#define pending_handle(_req, _seg) \
|
|
+ (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
|
|
+
|
|
+
|
|
+static int do_block_io_op(blkif_t *blkif);
|
|
+static void dispatch_rw_block_io(blkif_t *blkif,
|
|
+ struct blkif_request *req,
|
|
+ pending_req_t *pending_req);
|
|
+static void make_response(blkif_t *blkif, u64 id,
|
|
+ unsigned short op, int st);
|
|
+
|
|
+/******************************************************************
|
|
+ * misc small helpers
|
|
+ */
|
|
+static pending_req_t* alloc_req(void)
|
|
+{
|
|
+ pending_req_t *req = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&blkbk->pending_free_lock, flags);
|
|
+ if (!list_empty(&blkbk->pending_free)) {
|
|
+ req = list_entry(blkbk->pending_free.next, pending_req_t, free_list);
|
|
+ list_del(&req->free_list);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
|
|
+ return req;
|
|
+}
|
|
+
|
|
+static void free_req(pending_req_t *req)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int was_empty;
|
|
+
|
|
+ spin_lock_irqsave(&blkbk->pending_free_lock, flags);
|
|
+ was_empty = list_empty(&blkbk->pending_free);
|
|
+ list_add(&req->free_list, &blkbk->pending_free);
|
|
+ spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
|
|
+ if (was_empty)
|
|
+ wake_up(&blkbk->pending_free_wq);
|
|
+}
|
|
+
|
|
+static void unplug_queue(blkif_t *blkif)
|
|
+{
|
|
+ if (blkif->plug == NULL)
|
|
+ return;
|
|
+ if (blkif->plug->unplug_fn)
|
|
+ blkif->plug->unplug_fn(blkif->plug);
|
|
+ blk_put_queue(blkif->plug);
|
|
+ blkif->plug = NULL;
|
|
+}
|
|
+
|
|
+static void plug_queue(blkif_t *blkif, struct block_device *bdev)
|
|
+{
|
|
+ struct request_queue *q = bdev_get_queue(bdev);
|
|
+
|
|
+ if (q == blkif->plug)
|
|
+ return;
|
|
+ unplug_queue(blkif);
|
|
+ blk_get_queue(q);
|
|
+ blkif->plug = q;
|
|
+}
|
|
+
|
|
+static void fast_flush_area(pending_req_t *req)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ unsigned int i, invcount = 0;
|
|
+ grant_handle_t handle;
|
|
+ int ret;
|
|
+
|
|
+ for (i = 0; i < req->nr_pages; i++) {
|
|
+ handle = pending_handle(req, i);
|
|
+ if (handle == BLKBACK_INVALID_HANDLE)
|
|
+ continue;
|
|
+ gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
|
|
+ GNTMAP_host_map, handle);
|
|
+ pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
|
|
+ invcount++;
|
|
+ }
|
|
+
|
|
+ ret = HYPERVISOR_grant_table_op(
|
|
+ GNTTABOP_unmap_grant_ref, unmap, invcount);
|
|
+ BUG_ON(ret);
|
|
+ /* Note, we use invcount, so nr->pages, so we can't index
|
|
+ * using vaddr(req, i). */
|
|
+ for (i = 0; i < invcount; i++) {
|
|
+ ret = m2p_remove_override(
|
|
+ virt_to_page(unmap[i].host_addr), false);
|
|
+ if (ret) {
|
|
+ printk(KERN_ALERT "Failed to remove M2P override for " \
|
|
+ "%lx\n", (unsigned long)unmap[i].host_addr);
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/******************************************************************
|
|
+ * SCHEDULER FUNCTIONS
|
|
+ */
|
|
+
|
|
+static void print_stats(blkif_t *blkif)
|
|
+{
|
|
+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
|
|
+ current->comm, blkif->st_oo_req,
|
|
+ blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
|
|
+ blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
|
|
+ blkif->st_rd_req = 0;
|
|
+ blkif->st_wr_req = 0;
|
|
+ blkif->st_oo_req = 0;
|
|
+}
|
|
+
|
|
+int blkif_schedule(void *arg)
|
|
+{
|
|
+ blkif_t *blkif = arg;
|
|
+ struct vbd *vbd = &blkif->vbd;
|
|
+
|
|
+ blkif_get(blkif);
|
|
+
|
|
+ if (debug_lvl)
|
|
+ printk(KERN_DEBUG "%s: started\n", current->comm);
|
|
+
|
|
+ while (!kthread_should_stop()) {
|
|
+ if (try_to_freeze())
|
|
+ continue;
|
|
+ if (unlikely(vbd->size != vbd_size(vbd)))
|
|
+ vbd_resize(blkif);
|
|
+
|
|
+ wait_event_interruptible(
|
|
+ blkif->wq,
|
|
+ blkif->waiting_reqs || kthread_should_stop());
|
|
+ wait_event_interruptible(
|
|
+ blkbk->pending_free_wq,
|
|
+ !list_empty(&blkbk->pending_free) || kthread_should_stop());
|
|
+
|
|
+ blkif->waiting_reqs = 0;
|
|
+ smp_mb(); /* clear flag *before* checking for work */
|
|
+
|
|
+ if (do_block_io_op(blkif))
|
|
+ blkif->waiting_reqs = 1;
|
|
+ unplug_queue(blkif);
|
|
+
|
|
+ if (log_stats && time_after(jiffies, blkif->st_print))
|
|
+ print_stats(blkif);
|
|
+ }
|
|
+
|
|
+ if (log_stats)
|
|
+ print_stats(blkif);
|
|
+ if (debug_lvl)
|
|
+ printk(KERN_DEBUG "%s: exiting\n", current->comm);
|
|
+
|
|
+ blkif->xenblkd = NULL;
|
|
+ blkif_put(blkif);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/******************************************************************
|
|
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
|
|
+ */
|
|
+
|
|
+static void __end_block_io_op(pending_req_t *pending_req, int error)
|
|
+{
|
|
+ /* An error fails the entire request. */
|
|
+ if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
|
|
+ (error == -EOPNOTSUPP)) {
|
|
+ DPRINTK("blkback: write barrier op failed, not supported\n");
|
|
+ blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
|
|
+ pending_req->status = BLKIF_RSP_EOPNOTSUPP;
|
|
+ } else if (error) {
|
|
+ DPRINTK("Buffer not up-to-date at end of operation, "
|
|
+ "error=%d\n", error);
|
|
+ pending_req->status = BLKIF_RSP_ERROR;
|
|
+ }
|
|
+
|
|
+ if (atomic_dec_and_test(&pending_req->pendcnt)) {
|
|
+ fast_flush_area(pending_req);
|
|
+ make_response(pending_req->blkif, pending_req->id,
|
|
+ pending_req->operation, pending_req->status);
|
|
+ blkif_put(pending_req->blkif);
|
|
+ free_req(pending_req);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void end_block_io_op(struct bio *bio, int error)
|
|
+{
|
|
+ __end_block_io_op(bio->bi_private, error);
|
|
+ bio_put(bio);
|
|
+}
|
|
+
|
|
+
|
|
+/******************************************************************************
|
|
+ * NOTIFICATION FROM GUEST OS.
|
|
+ */
|
|
+
|
|
+static void blkif_notify_work(blkif_t *blkif)
|
|
+{
|
|
+ blkif->waiting_reqs = 1;
|
|
+ wake_up(&blkif->wq);
|
|
+}
|
|
+
|
|
+irqreturn_t blkif_be_int(int irq, void *dev_id)
|
|
+{
|
|
+ blkif_notify_work(dev_id);
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/******************************************************************
|
|
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
|
|
+ */
|
|
+
|
|
+static int do_block_io_op(blkif_t *blkif)
|
|
+{
|
|
+ union blkif_back_rings *blk_rings = &blkif->blk_rings;
|
|
+ struct blkif_request req;
|
|
+ pending_req_t *pending_req;
|
|
+ RING_IDX rc, rp;
|
|
+ int more_to_do = 0;
|
|
+
|
|
+ rc = blk_rings->common.req_cons;
|
|
+ rp = blk_rings->common.sring->req_prod;
|
|
+ rmb(); /* Ensure we see queued requests up to 'rp'. */
|
|
+
|
|
+ while (rc != rp) {
|
|
+
|
|
+ if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
|
|
+ break;
|
|
+
|
|
+ if (kthread_should_stop()) {
|
|
+ more_to_do = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ pending_req = alloc_req();
|
|
+ if (NULL == pending_req) {
|
|
+ blkif->st_oo_req++;
|
|
+ more_to_do = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ switch (blkif->blk_protocol) {
|
|
+ case BLKIF_PROTOCOL_NATIVE:
|
|
+ memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_32:
|
|
+ blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_64:
|
|
+ blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+ blk_rings->common.req_cons = ++rc; /* before make_response() */
|
|
+
|
|
+ /* Apply all sanity checks to /private copy/ of request. */
|
|
+ barrier();
|
|
+
|
|
+ switch (req.operation) {
|
|
+ case BLKIF_OP_READ:
|
|
+ blkif->st_rd_req++;
|
|
+ dispatch_rw_block_io(blkif, &req, pending_req);
|
|
+ break;
|
|
+ case BLKIF_OP_WRITE_BARRIER:
|
|
+ blkif->st_br_req++;
|
|
+ /* fall through */
|
|
+ case BLKIF_OP_WRITE:
|
|
+ blkif->st_wr_req++;
|
|
+ dispatch_rw_block_io(blkif, &req, pending_req);
|
|
+ break;
|
|
+ default:
|
|
+ /* A good sign something is wrong: sleep for a while to
|
|
+ * avoid excessive CPU consumption by a bad guest. */
|
|
+ msleep(1);
|
|
+ DPRINTK("error: unknown block io operation [%d]\n",
|
|
+ req.operation);
|
|
+ make_response(blkif, req.id, req.operation,
|
|
+ BLKIF_RSP_ERROR);
|
|
+ free_req(pending_req);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Yield point for this unbounded loop. */
|
|
+ cond_resched();
|
|
+ }
|
|
+
|
|
+ return more_to_do;
|
|
+}
|
|
+
|
|
+static void dispatch_rw_block_io(blkif_t *blkif,
|
|
+ struct blkif_request *req,
|
|
+ pending_req_t *pending_req)
|
|
+{
|
|
+ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ struct phys_req preq;
|
|
+ struct {
|
|
+ unsigned long buf; unsigned int nsec;
|
|
+ } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ unsigned int nseg;
|
|
+ struct bio *bio = NULL;
|
|
+ int ret, i;
|
|
+ int operation;
|
|
+
|
|
+ switch (req->operation) {
|
|
+ case BLKIF_OP_READ:
|
|
+ operation = READ;
|
|
+ break;
|
|
+ case BLKIF_OP_WRITE:
|
|
+ operation = WRITE;
|
|
+ break;
|
|
+ case BLKIF_OP_WRITE_BARRIER:
|
|
+ operation = REQ_FLUSH | REQ_FUA;
|
|
+ break;
|
|
+ default:
|
|
+ operation = 0; /* make gcc happy */
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ /* Check that number of segments is sane. */
|
|
+ nseg = req->nr_segments;
|
|
+ if (unlikely(nseg == 0 && operation != (REQ_FLUSH | REQ_FUA)) ||
|
|
+ unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
|
|
+ DPRINTK("Bad number of segments in request (%d)\n", nseg);
|
|
+ goto fail_response;
|
|
+ }
|
|
+
|
|
+ preq.dev = req->handle;
|
|
+ preq.sector_number = req->u.rw.sector_number;
|
|
+ preq.nr_sects = 0;
|
|
+
|
|
+ pending_req->blkif = blkif;
|
|
+ pending_req->id = req->id;
|
|
+ pending_req->operation = req->operation;
|
|
+ pending_req->status = BLKIF_RSP_OKAY;
|
|
+ pending_req->nr_pages = nseg;
|
|
+
|
|
+ for (i = 0; i < nseg; i++) {
|
|
+ uint32_t flags;
|
|
+
|
|
+ seg[i].nsec = req->u.rw.seg[i].last_sect -
|
|
+ req->u.rw.seg[i].first_sect + 1;
|
|
+
|
|
+ if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
|
|
+ (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
|
|
+ goto fail_response;
|
|
+ preq.nr_sects += seg[i].nsec;
|
|
+
|
|
+ flags = GNTMAP_host_map;
|
|
+ if (operation != READ)
|
|
+ flags |= GNTMAP_readonly;
|
|
+ gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
|
|
+ req->u.rw.seg[i].gref, blkif->domid);
|
|
+ }
|
|
+
|
|
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ for (i = 0; i < nseg; i++) {
|
|
+ if (unlikely(map[i].status != 0)) {
|
|
+ DPRINTK("invalid buffer -- could not remap it\n");
|
|
+ map[i].handle = BLKBACK_INVALID_HANDLE;
|
|
+ ret |= 1;
|
|
+ }
|
|
+
|
|
+ pending_handle(pending_req, i) = map[i].handle;
|
|
+
|
|
+ if (ret)
|
|
+ continue;
|
|
+
|
|
+ ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
|
|
+ blkbk->pending_page(pending_req, i), false);
|
|
+ if (ret) {
|
|
+ printk(KERN_ALERT "Failed to install M2P override for"\
|
|
+ " %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ seg[i].buf = map[i].dev_bus_addr |
|
|
+ (req->u.rw.seg[i].first_sect << 9);
|
|
+ }
|
|
+
|
|
+ if (ret)
|
|
+ goto fail_flush;
|
|
+
|
|
+ if (vbd_translate(&preq, blkif, operation) != 0) {
|
|
+ DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
|
|
+ operation == READ ? "read" : "write",
|
|
+ preq.sector_number,
|
|
+ preq.sector_number + preq.nr_sects, preq.dev);
|
|
+ goto fail_flush;
|
|
+ }
|
|
+
|
|
+ plug_queue(blkif, preq.bdev);
|
|
+ atomic_set(&pending_req->pendcnt, 1);
|
|
+ blkif_get(blkif);
|
|
+
|
|
+ for (i = 0; i < nseg; i++) {
|
|
+ if (((int)preq.sector_number|(int)seg[i].nsec) &
|
|
+ ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
|
|
+ DPRINTK("Misaligned I/O request from domain %d",
|
|
+ blkif->domid);
|
|
+ goto fail_put_bio;
|
|
+ }
|
|
+
|
|
+ while ((bio == NULL) ||
|
|
+ (bio_add_page(bio,
|
|
+ blkbk->pending_page(pending_req, i),
|
|
+ seg[i].nsec << 9,
|
|
+ seg[i].buf & ~PAGE_MASK) == 0)) {
|
|
+ if (bio) {
|
|
+ atomic_inc(&pending_req->pendcnt);
|
|
+ submit_bio(operation, bio);
|
|
+ }
|
|
+
|
|
+ bio = bio_alloc(GFP_KERNEL, nseg-i);
|
|
+ if (unlikely(bio == NULL))
|
|
+ goto fail_put_bio;
|
|
+
|
|
+ bio->bi_bdev = preq.bdev;
|
|
+ bio->bi_private = pending_req;
|
|
+ bio->bi_end_io = end_block_io_op;
|
|
+ bio->bi_sector = preq.sector_number;
|
|
+ }
|
|
+
|
|
+ preq.sector_number += seg[i].nsec;
|
|
+ }
|
|
+
|
|
+ if (!bio) {
|
|
+ BUG_ON(operation != (REQ_FLUSH | REQ_FUA));
|
|
+ bio = bio_alloc(GFP_KERNEL, 0);
|
|
+ if (unlikely(bio == NULL))
|
|
+ goto fail_put_bio;
|
|
+
|
|
+ bio->bi_bdev = preq.bdev;
|
|
+ bio->bi_private = pending_req;
|
|
+ bio->bi_end_io = end_block_io_op;
|
|
+ bio->bi_sector = -1;
|
|
+ }
|
|
+
|
|
+ submit_bio(operation, bio);
|
|
+
|
|
+ if (operation == READ)
|
|
+ blkif->st_rd_sect += preq.nr_sects;
|
|
+ else if (operation == WRITE || operation == (REQ_FLUSH | REQ_FUA))
|
|
+ blkif->st_wr_sect += preq.nr_sects;
|
|
+
|
|
+ return;
|
|
+
|
|
+ fail_flush:
|
|
+ fast_flush_area(pending_req);
|
|
+ fail_response:
|
|
+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
|
|
+ free_req(pending_req);
|
|
+ msleep(1); /* back off a bit */
|
|
+ return;
|
|
+
|
|
+ fail_put_bio:
|
|
+ __end_block_io_op(pending_req, -EINVAL);
|
|
+ if (bio)
|
|
+ bio_put(bio);
|
|
+ unplug_queue(blkif);
|
|
+ msleep(1); /* back off a bit */
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/******************************************************************
|
|
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
|
|
+ */
|
|
+
|
|
+
|
|
+static void make_response(blkif_t *blkif, u64 id,
|
|
+ unsigned short op, int st)
|
|
+{
|
|
+ struct blkif_response resp;
|
|
+ unsigned long flags;
|
|
+ union blkif_back_rings *blk_rings = &blkif->blk_rings;
|
|
+ int more_to_do = 0;
|
|
+ int notify;
|
|
+
|
|
+ resp.id = id;
|
|
+ resp.operation = op;
|
|
+ resp.status = st;
|
|
+
|
|
+ spin_lock_irqsave(&blkif->blk_ring_lock, flags);
|
|
+ /* Place on the response ring for the relevant domain. */
|
|
+ switch (blkif->blk_protocol) {
|
|
+ case BLKIF_PROTOCOL_NATIVE:
|
|
+ memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
|
|
+ &resp, sizeof(resp));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_32:
|
|
+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
|
|
+ &resp, sizeof(resp));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_64:
|
|
+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
|
|
+ &resp, sizeof(resp));
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+ blk_rings->common.rsp_prod_pvt++;
|
|
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
|
|
+ if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
|
|
+ /*
|
|
+ * Tail check for pending requests. Allows frontend to avoid
|
|
+ * notifications if requests are already in flight (lower
|
|
+ * overheads and promotes batching).
|
|
+ */
|
|
+ RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
|
|
+
|
|
+ } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
|
|
+ more_to_do = 1;
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
|
|
+
|
|
+ if (more_to_do)
|
|
+ blkif_notify_work(blkif);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(blkif->irq);
|
|
+}
|
|
+
|
|
+static int __init blkif_init(void)
|
|
+{
|
|
+ int i, mmap_pages;
|
|
+ int rc = 0;
|
|
+
|
|
+ if (!xen_pv_domain())
|
|
+ return -ENODEV;
|
|
+
|
|
+ blkbk = (struct xen_blkbk *)vmalloc(sizeof(struct xen_blkbk));
|
|
+ if (!blkbk) {
|
|
+ printk(KERN_ALERT "%s: out of memory!\n", __func__);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
|
+
|
|
+ blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) *
|
|
+ blkif_reqs, GFP_KERNEL);
|
|
+ blkbk->pending_grant_handles = vzalloc(sizeof(blkbk->pending_grant_handles[0]) *
|
|
+ mmap_pages);
|
|
+ blkbk->pending_pages = vzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages);
|
|
+
|
|
+ if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) {
|
|
+ rc = -ENOMEM;
|
|
+ goto out_of_memory;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < mmap_pages; i++) {
|
|
+ blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
|
|
+ blkbk->pending_pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
|
|
+ if (blkbk->pending_pages[i] == NULL) {
|
|
+ rc = -ENOMEM;
|
|
+ goto out_of_memory;
|
|
+ }
|
|
+ }
|
|
+ rc = blkif_interface_init();
|
|
+ if (rc)
|
|
+ goto failed_init;
|
|
+
|
|
+ memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
|
|
+
|
|
+ INIT_LIST_HEAD(&blkbk->pending_free);
|
|
+ spin_lock_init(&blkbk->pending_free_lock);
|
|
+ init_waitqueue_head(&blkbk->pending_free_wq);
|
|
+
|
|
+ for (i = 0; i < blkif_reqs; i++)
|
|
+ list_add_tail(&blkbk->pending_reqs[i].free_list, &blkbk->pending_free);
|
|
+
|
|
+ rc = blkif_xenbus_init();
|
|
+ if (rc)
|
|
+ goto failed_init;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ out_of_memory:
|
|
+ printk(KERN_ERR "%s: out of memory\n", __func__);
|
|
+ failed_init:
|
|
+ kfree(blkbk->pending_reqs);
|
|
+ vfree(blkbk->pending_grant_handles);
|
|
+ for (i = 0; i < mmap_pages; i++) {
|
|
+ if (blkbk->pending_pages[i])
|
|
+ __free_page(blkbk->pending_pages[i]);
|
|
+ }
|
|
+ vfree(blkbk->pending_pages);
|
|
+ vfree(blkbk);
|
|
+ blkbk = NULL;
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+module_init(blkif_init);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h
|
|
new file mode 100644
|
|
index 0000000..0f91830
|
|
--- /dev/null
|
|
+++ b/drivers/xen/blkback/common.h
|
|
@@ -0,0 +1,141 @@
|
|
+/*
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __BLKIF__BACKEND__COMMON_H__
|
|
+#define __BLKIF__BACKEND__COMMON_H__
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/blkdev.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/wait.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/setup.h>
|
|
+#include <asm/pgalloc.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/blkif.h>
|
|
+#include <xen/grant_table.h>
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+#define DPRINTK(_f, _a...) \
|
|
+ pr_debug("(file=%s, line=%d) " _f, \
|
|
+ __FILE__ , __LINE__ , ## _a )
|
|
+
|
|
+struct vbd {
|
|
+ blkif_vdev_t handle; /* what the domain refers to this vbd as */
|
|
+ unsigned char readonly; /* Non-zero -> read-only */
|
|
+ unsigned char type; /* VDISK_xxx */
|
|
+ u32 pdevice; /* phys device that this vbd maps to */
|
|
+ struct block_device *bdev;
|
|
+ sector_t size; /* Cached size parameter */
|
|
+};
|
|
+
|
|
+struct backend_info;
|
|
+
|
|
+typedef struct blkif_st {
|
|
+ /* Unique identifier for this interface. */
|
|
+ domid_t domid;
|
|
+ unsigned int handle;
|
|
+ /* Physical parameters of the comms window. */
|
|
+ unsigned int irq;
|
|
+ /* Comms information. */
|
|
+ enum blkif_protocol blk_protocol;
|
|
+ union blkif_back_rings blk_rings;
|
|
+ struct vm_struct *blk_ring_area;
|
|
+ /* The VBD attached to this interface. */
|
|
+ struct vbd vbd;
|
|
+ /* Back pointer to the backend_info. */
|
|
+ struct backend_info *be;
|
|
+ /* Private fields. */
|
|
+ spinlock_t blk_ring_lock;
|
|
+ atomic_t refcnt;
|
|
+
|
|
+ wait_queue_head_t wq;
|
|
+ struct task_struct *xenblkd;
|
|
+ unsigned int waiting_reqs;
|
|
+ struct request_queue *plug;
|
|
+
|
|
+ /* statistics */
|
|
+ unsigned long st_print;
|
|
+ int st_rd_req;
|
|
+ int st_wr_req;
|
|
+ int st_oo_req;
|
|
+ int st_br_req;
|
|
+ int st_rd_sect;
|
|
+ int st_wr_sect;
|
|
+
|
|
+ wait_queue_head_t waiting_to_free;
|
|
+
|
|
+ grant_handle_t shmem_handle;
|
|
+ grant_ref_t shmem_ref;
|
|
+} blkif_t;
|
|
+
|
|
+blkif_t *blkif_alloc(domid_t domid);
|
|
+void blkif_disconnect(blkif_t *blkif);
|
|
+void blkif_free(blkif_t *blkif);
|
|
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
|
|
+void vbd_resize(blkif_t *blkif);
|
|
+
|
|
+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
|
|
+#define blkif_put(_b) \
|
|
+ do { \
|
|
+ if (atomic_dec_and_test(&(_b)->refcnt)) \
|
|
+ wake_up(&(_b)->waiting_to_free);\
|
|
+ } while (0)
|
|
+
|
|
+/* Create a vbd. */
|
|
+int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
|
|
+ unsigned minor, int readonly, int cdrom);
|
|
+void vbd_free(struct vbd *vbd);
|
|
+
|
|
+unsigned long long vbd_size(struct vbd *vbd);
|
|
+unsigned int vbd_info(struct vbd *vbd);
|
|
+unsigned long vbd_secsize(struct vbd *vbd);
|
|
+
|
|
+struct phys_req {
|
|
+ unsigned short dev;
|
|
+ unsigned short nr_sects;
|
|
+ struct block_device *bdev;
|
|
+ blkif_sector_t sector_number;
|
|
+};
|
|
+
|
|
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
|
|
+
|
|
+int blkif_interface_init(void);
|
|
+
|
|
+int blkif_xenbus_init(void);
|
|
+
|
|
+irqreturn_t blkif_be_int(int irq, void *dev_id);
|
|
+int blkif_schedule(void *arg);
|
|
+
|
|
+int blkback_barrier(struct xenbus_transaction xbt,
|
|
+ struct backend_info *be, int state);
|
|
+
|
|
+struct xenbus_device *blkback_xenbus(struct backend_info *be);
|
|
+
|
|
+#endif /* __BLKIF__BACKEND__COMMON_H__ */
|
|
diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c
|
|
new file mode 100644
|
|
index 0000000..e397a41
|
|
--- /dev/null
|
|
+++ b/drivers/xen/blkback/interface.c
|
|
@@ -0,0 +1,186 @@
|
|
+/******************************************************************************
|
|
+ * arch/xen/drivers/blkif/backend/interface.c
|
|
+ *
|
|
+ * Block-device interface management.
|
|
+ *
|
|
+ * Copyright (c) 2004, Keir Fraser
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+#include <xen/events.h>
|
|
+#include <xen/grant_table.h>
|
|
+#include <linux/kthread.h>
|
|
+
|
|
+static struct kmem_cache *blkif_cachep;
|
|
+
|
|
+blkif_t *blkif_alloc(domid_t domid)
|
|
+{
|
|
+ blkif_t *blkif;
|
|
+
|
|
+ blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
|
|
+ if (!blkif)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ memset(blkif, 0, sizeof(*blkif));
|
|
+ blkif->domid = domid;
|
|
+ spin_lock_init(&blkif->blk_ring_lock);
|
|
+ atomic_set(&blkif->refcnt, 1);
|
|
+ init_waitqueue_head(&blkif->wq);
|
|
+ blkif->st_print = jiffies;
|
|
+ init_waitqueue_head(&blkif->waiting_to_free);
|
|
+
|
|
+ return blkif;
|
|
+}
|
|
+
|
|
+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
|
|
+ GNTMAP_host_map, shared_page, blkif->domid);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+
|
|
+ if (op.status) {
|
|
+ DPRINTK(" Grant table operation failure !\n");
|
|
+ return op.status;
|
|
+ }
|
|
+
|
|
+ blkif->shmem_ref = shared_page;
|
|
+ blkif->shmem_handle = op.handle;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void unmap_frontend_page(blkif_t *blkif)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
|
|
+ GNTMAP_host_map, blkif->shmem_handle);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ /* Already connected through? */
|
|
+ if (blkif->irq)
|
|
+ return 0;
|
|
+
|
|
+ if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
|
|
+ return -ENOMEM;
|
|
+
|
|
+ err = map_frontend_page(blkif, shared_page);
|
|
+ if (err) {
|
|
+ free_vm_area(blkif->blk_ring_area);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ switch (blkif->blk_protocol) {
|
|
+ case BLKIF_PROTOCOL_NATIVE:
|
|
+ {
|
|
+ struct blkif_sring *sring;
|
|
+ sring = (struct blkif_sring *)blkif->blk_ring_area->addr;
|
|
+ BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
|
|
+ break;
|
|
+ }
|
|
+ case BLKIF_PROTOCOL_X86_32:
|
|
+ {
|
|
+ struct blkif_x86_32_sring *sring_x86_32;
|
|
+ sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr;
|
|
+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
|
|
+ break;
|
|
+ }
|
|
+ case BLKIF_PROTOCOL_X86_64:
|
|
+ {
|
|
+ struct blkif_x86_64_sring *sring_x86_64;
|
|
+ sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr;
|
|
+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(
|
|
+ blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif);
|
|
+ if (err < 0)
|
|
+ {
|
|
+ unmap_frontend_page(blkif);
|
|
+ free_vm_area(blkif->blk_ring_area);
|
|
+ blkif->blk_rings.common.sring = NULL;
|
|
+ return err;
|
|
+ }
|
|
+ blkif->irq = err;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void blkif_disconnect(blkif_t *blkif)
|
|
+{
|
|
+ if (blkif->xenblkd) {
|
|
+ kthread_stop(blkif->xenblkd);
|
|
+ blkif->xenblkd = NULL;
|
|
+ }
|
|
+
|
|
+ atomic_dec(&blkif->refcnt);
|
|
+ wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
|
|
+ atomic_inc(&blkif->refcnt);
|
|
+
|
|
+ if (blkif->irq) {
|
|
+ unbind_from_irqhandler(blkif->irq, blkif);
|
|
+ blkif->irq = 0;
|
|
+ }
|
|
+
|
|
+ if (blkif->blk_rings.common.sring) {
|
|
+ unmap_frontend_page(blkif);
|
|
+ free_vm_area(blkif->blk_ring_area);
|
|
+ blkif->blk_rings.common.sring = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+void blkif_free(blkif_t *blkif)
|
|
+{
|
|
+ if (!atomic_dec_and_test(&blkif->refcnt))
|
|
+ BUG();
|
|
+ kmem_cache_free(blkif_cachep, blkif);
|
|
+}
|
|
+
|
|
+int __init blkif_interface_init(void)
|
|
+{
|
|
+ blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
|
|
+ 0, 0, NULL);
|
|
+ if (!blkif_cachep)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c
|
|
new file mode 100644
|
|
index 0000000..8c91a2f
|
|
--- /dev/null
|
|
+++ b/drivers/xen/blkback/vbd.c
|
|
@@ -0,0 +1,163 @@
|
|
+/******************************************************************************
|
|
+ * blkback/vbd.c
|
|
+ *
|
|
+ * Routines for managing virtual block devices (VBDs).
|
|
+ *
|
|
+ * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+
|
|
+#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
|
|
+ (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
|
|
+
|
|
+unsigned long long vbd_size(struct vbd *vbd)
|
|
+{
|
|
+ return vbd_sz(vbd);
|
|
+}
|
|
+
|
|
+unsigned int vbd_info(struct vbd *vbd)
|
|
+{
|
|
+ return vbd->type | (vbd->readonly?VDISK_READONLY:0);
|
|
+}
|
|
+
|
|
+unsigned long vbd_secsize(struct vbd *vbd)
|
|
+{
|
|
+ return bdev_logical_block_size(vbd->bdev);
|
|
+}
|
|
+
|
|
+int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
|
|
+ unsigned minor, int readonly, int cdrom)
|
|
+{
|
|
+ struct vbd *vbd;
|
|
+ struct block_device *bdev;
|
|
+
|
|
+ vbd = &blkif->vbd;
|
|
+ vbd->handle = handle;
|
|
+ vbd->readonly = readonly;
|
|
+ vbd->type = 0;
|
|
+
|
|
+ vbd->pdevice = MKDEV(major, minor);
|
|
+
|
|
+ bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
|
|
+ FMODE_READ : FMODE_WRITE, NULL);
|
|
+
|
|
+ if (IS_ERR(bdev)) {
|
|
+ DPRINTK("vbd_creat: device %08x could not be opened.\n",
|
|
+ vbd->pdevice);
|
|
+ return -ENOENT;
|
|
+ }
|
|
+
|
|
+ vbd->bdev = bdev;
|
|
+ vbd->size = vbd_size(vbd);
|
|
+
|
|
+ if (vbd->bdev->bd_disk == NULL) {
|
|
+ DPRINTK("vbd_creat: device %08x doesn't exist.\n",
|
|
+ vbd->pdevice);
|
|
+ vbd_free(vbd);
|
|
+ return -ENOENT;
|
|
+ }
|
|
+
|
|
+ if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
|
|
+ vbd->type |= VDISK_CDROM;
|
|
+ if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
|
|
+ vbd->type |= VDISK_REMOVABLE;
|
|
+
|
|
+ DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
|
|
+ handle, blkif->domid);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void vbd_free(struct vbd *vbd)
|
|
+{
|
|
+ if (vbd->bdev)
|
|
+ blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
|
|
+ vbd->bdev = NULL;
|
|
+}
|
|
+
|
|
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
|
|
+{
|
|
+ struct vbd *vbd = &blkif->vbd;
|
|
+ int rc = -EACCES;
|
|
+
|
|
+ if ((operation != READ) && vbd->readonly)
|
|
+ goto out;
|
|
+
|
|
+ if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
|
|
+ goto out;
|
|
+
|
|
+ req->dev = vbd->pdevice;
|
|
+ req->bdev = vbd->bdev;
|
|
+ rc = 0;
|
|
+
|
|
+ out:
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+void vbd_resize(blkif_t *blkif)
|
|
+{
|
|
+ struct vbd *vbd = &blkif->vbd;
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+ struct xenbus_device *dev = blkback_xenbus(blkif->be);
|
|
+ unsigned long long new_size = vbd_size(vbd);
|
|
+
|
|
+ printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n",
|
|
+ blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice));
|
|
+ printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size);
|
|
+ vbd->size = new_size;
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ printk(KERN_WARNING "Error starting transaction");
|
|
+ return;
|
|
+ }
|
|
+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu",
|
|
+ vbd_size(vbd));
|
|
+ if (err) {
|
|
+ printk(KERN_WARNING "Error writing new size");
|
|
+ goto abort;
|
|
+ }
|
|
+ /*
|
|
+ * Write the current state; we will use this to synchronize
|
|
+ * the front-end. If the current state is "connected" the
|
|
+ * front-end will get the new size information online.
|
|
+ */
|
|
+ err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
|
|
+ if (err) {
|
|
+ printk(KERN_WARNING "Error writing the state");
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ if (err)
|
|
+ printk(KERN_WARNING "Error ending transaction");
|
|
+abort:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+}
|
|
diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c
|
|
new file mode 100644
|
|
index 0000000..031bc3d
|
|
--- /dev/null
|
|
+++ b/drivers/xen/blkback/xenbus.c
|
|
@@ -0,0 +1,559 @@
|
|
+/* Xenbus code for blkif backend
|
|
+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
|
|
+ Copyright (C) 2005 XenSource Ltd
|
|
+
|
|
+ This program is free software; you can redistribute it and/or modify
|
|
+ it under the terms of the GNU General Public License as published by
|
|
+ the Free Software Foundation; either version 2 of the License, or
|
|
+ (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ GNU General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
+*/
|
|
+
|
|
+#include <stdarg.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/kthread.h>
|
|
+#include "common.h"
|
|
+
|
|
+#undef DPRINTK
|
|
+#define DPRINTK(fmt, args...) \
|
|
+ pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \
|
|
+ __FUNCTION__, __LINE__, ##args)
|
|
+
|
|
+struct backend_info
|
|
+{
|
|
+ struct xenbus_device *dev;
|
|
+ blkif_t *blkif;
|
|
+ struct xenbus_watch backend_watch;
|
|
+ unsigned major;
|
|
+ unsigned minor;
|
|
+ char *mode;
|
|
+};
|
|
+
|
|
+static void connect(struct backend_info *);
|
|
+static int connect_ring(struct backend_info *);
|
|
+static void backend_changed(struct xenbus_watch *, const char **,
|
|
+ unsigned int);
|
|
+
|
|
+struct xenbus_device *blkback_xenbus(struct backend_info *be)
|
|
+{
|
|
+ return be->dev;
|
|
+}
|
|
+
|
|
+static int blkback_name(blkif_t *blkif, char *buf)
|
|
+{
|
|
+ char *devpath, *devname;
|
|
+ struct xenbus_device *dev = blkif->be->dev;
|
|
+
|
|
+ devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
|
|
+ if (IS_ERR(devpath))
|
|
+ return PTR_ERR(devpath);
|
|
+
|
|
+ if ((devname = strstr(devpath, "/dev/")) != NULL)
|
|
+ devname += strlen("/dev/");
|
|
+ else
|
|
+ devname = devpath;
|
|
+
|
|
+ snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
|
|
+ kfree(devpath);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void update_blkif_status(blkif_t *blkif)
|
|
+{
|
|
+ int err;
|
|
+ char name[TASK_COMM_LEN];
|
|
+
|
|
+ /* Not ready to connect? */
|
|
+ if (!blkif->irq || !blkif->vbd.bdev)
|
|
+ return;
|
|
+
|
|
+ /* Already connected? */
|
|
+ if (blkif->be->dev->state == XenbusStateConnected)
|
|
+ return;
|
|
+
|
|
+ /* Attempt to connect: exit if we fail to. */
|
|
+ connect(blkif->be);
|
|
+ if (blkif->be->dev->state != XenbusStateConnected)
|
|
+ return;
|
|
+
|
|
+ err = blkback_name(blkif, name);
|
|
+ if (err) {
|
|
+ xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
|
|
+ if (err) {
|
|
+ xenbus_dev_error(blkif->be->dev, err, "block flush");
|
|
+ return;
|
|
+ }
|
|
+ invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
|
|
+
|
|
+ blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
|
|
+ if (IS_ERR(blkif->xenblkd)) {
|
|
+ err = PTR_ERR(blkif->xenblkd);
|
|
+ blkif->xenblkd = NULL;
|
|
+ xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/****************************************************************
|
|
+ * sysfs interface for VBD I/O requests
|
|
+ */
|
|
+
|
|
+#define VBD_SHOW(name, format, args...) \
|
|
+ static ssize_t show_##name(struct device *_dev, \
|
|
+ struct device_attribute *attr, \
|
|
+ char *buf) \
|
|
+ { \
|
|
+ struct xenbus_device *dev = to_xenbus_device(_dev); \
|
|
+ struct backend_info *be = dev_get_drvdata(&dev->dev); \
|
|
+ \
|
|
+ return sprintf(buf, format, ##args); \
|
|
+ } \
|
|
+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
|
|
+
|
|
+VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
|
|
+VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
|
|
+VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
|
|
+VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
|
|
+VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
|
|
+VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
|
|
+
|
|
+static struct attribute *vbdstat_attrs[] = {
|
|
+ &dev_attr_oo_req.attr,
|
|
+ &dev_attr_rd_req.attr,
|
|
+ &dev_attr_wr_req.attr,
|
|
+ &dev_attr_br_req.attr,
|
|
+ &dev_attr_rd_sect.attr,
|
|
+ &dev_attr_wr_sect.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group vbdstat_group = {
|
|
+ .name = "statistics",
|
|
+ .attrs = vbdstat_attrs,
|
|
+};
|
|
+
|
|
+VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
|
|
+VBD_SHOW(mode, "%s\n", be->mode);
|
|
+
|
|
+int xenvbd_sysfs_addif(struct xenbus_device *dev)
|
|
+{
|
|
+ int error;
|
|
+
|
|
+ error = device_create_file(&dev->dev, &dev_attr_physical_device);
|
|
+ if (error)
|
|
+ goto fail1;
|
|
+
|
|
+ error = device_create_file(&dev->dev, &dev_attr_mode);
|
|
+ if (error)
|
|
+ goto fail2;
|
|
+
|
|
+ error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
|
|
+ if (error)
|
|
+ goto fail3;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
|
|
+fail2: device_remove_file(&dev->dev, &dev_attr_mode);
|
|
+fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
|
|
+ return error;
|
|
+}
|
|
+
|
|
+void xenvbd_sysfs_delif(struct xenbus_device *dev)
|
|
+{
|
|
+ sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
|
|
+ device_remove_file(&dev->dev, &dev_attr_mode);
|
|
+ device_remove_file(&dev->dev, &dev_attr_physical_device);
|
|
+}
|
|
+
|
|
+static int blkback_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
|
|
+
|
|
+ DPRINTK("");
|
|
+
|
|
+ if (be->major || be->minor)
|
|
+ xenvbd_sysfs_delif(dev);
|
|
+
|
|
+ if (be->backend_watch.node) {
|
|
+ unregister_xenbus_watch(&be->backend_watch);
|
|
+ kfree(be->backend_watch.node);
|
|
+ be->backend_watch.node = NULL;
|
|
+ }
|
|
+
|
|
+ if (be->blkif) {
|
|
+ blkif_disconnect(be->blkif);
|
|
+ vbd_free(&be->blkif->vbd);
|
|
+ blkif_free(be->blkif);
|
|
+ be->blkif = NULL;
|
|
+ }
|
|
+
|
|
+ kfree(be);
|
|
+ dev_set_drvdata(&dev->dev, NULL);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int blkback_barrier(struct xenbus_transaction xbt,
|
|
+ struct backend_info *be, int state)
|
|
+{
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ int err;
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
|
|
+ "%d", state);
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(dev, err, "writing feature-barrier");
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Entry point to this code when a new device is created. Allocate the basic
|
|
+ * structures, and watch the store waiting for the hotplug scripts to tell us
|
|
+ * the device's physical major and minor numbers. Switch to InitWait.
|
|
+ */
|
|
+static int blkback_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err;
|
|
+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
|
|
+ GFP_KERNEL);
|
|
+ if (!be) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM,
|
|
+ "allocating backend structure");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ be->dev = dev;
|
|
+ dev_set_drvdata(&dev->dev, be);
|
|
+
|
|
+ be->blkif = blkif_alloc(dev->otherend_id);
|
|
+ if (IS_ERR(be->blkif)) {
|
|
+ err = PTR_ERR(be->blkif);
|
|
+ be->blkif = NULL;
|
|
+ xenbus_dev_fatal(dev, err, "creating block interface");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ /* setup back pointer */
|
|
+ be->blkif->be = be;
|
|
+
|
|
+ err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
|
|
+ "%s/%s", dev->nodename, "physical-device");
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ DPRINTK("failed");
|
|
+ blkback_remove(dev);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * Callback received when the hotplug scripts have placed the physical-device
|
|
+ * node. Read it and the mode node, and create a vbd. If the frontend is
|
|
+ * ready, connect.
|
|
+ */
|
|
+static void backend_changed(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ int err;
|
|
+ unsigned major;
|
|
+ unsigned minor;
|
|
+ struct backend_info *be
|
|
+ = container_of(watch, struct backend_info, backend_watch);
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ int cdrom = 0;
|
|
+ char *device_type;
|
|
+
|
|
+ DPRINTK("");
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
|
|
+ &major, &minor);
|
|
+ if (XENBUS_EXIST_ERR(err)) {
|
|
+ /* Since this watch will fire once immediately after it is
|
|
+ registered, we expect this. Ignore it, and wait for the
|
|
+ hotplug scripts. */
|
|
+ return;
|
|
+ }
|
|
+ if (err != 2) {
|
|
+ xenbus_dev_fatal(dev, err, "reading physical-device");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if ((be->major || be->minor) &&
|
|
+ ((be->major != major) || (be->minor != minor))) {
|
|
+ printk(KERN_WARNING
|
|
+ "blkback: changing physical device (from %x:%x to "
|
|
+ "%x:%x) not supported.\n", be->major, be->minor,
|
|
+ major, minor);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
|
|
+ if (IS_ERR(be->mode)) {
|
|
+ err = PTR_ERR(be->mode);
|
|
+ be->mode = NULL;
|
|
+ xenbus_dev_fatal(dev, err, "reading mode");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
|
|
+ if (!IS_ERR(device_type)) {
|
|
+ cdrom = strcmp(device_type, "cdrom") == 0;
|
|
+ kfree(device_type);
|
|
+ }
|
|
+
|
|
+ if (be->major == 0 && be->minor == 0) {
|
|
+ /* Front end dir is a number, which is used as the handle. */
|
|
+
|
|
+ char *p = strrchr(dev->otherend, '/') + 1;
|
|
+ long handle = simple_strtoul(p, NULL, 0);
|
|
+
|
|
+ be->major = major;
|
|
+ be->minor = minor;
|
|
+
|
|
+ err = vbd_create(be->blkif, handle, major, minor,
|
|
+ (NULL == strchr(be->mode, 'w')), cdrom);
|
|
+ if (err) {
|
|
+ be->major = be->minor = 0;
|
|
+ xenbus_dev_fatal(dev, err, "creating vbd structure");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ err = xenvbd_sysfs_addif(dev);
|
|
+ if (err) {
|
|
+ vbd_free(&be->blkif->vbd);
|
|
+ be->major = be->minor = 0;
|
|
+ xenbus_dev_fatal(dev, err, "creating sysfs entries");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* We're potentially connected now */
|
|
+ update_blkif_status(be->blkif);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * Callback received when the frontend's state changes.
|
|
+ */
|
|
+static void frontend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state frontend_state)
|
|
+{
|
|
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("%s", xenbus_strstate(frontend_state));
|
|
+
|
|
+ switch (frontend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ if (dev->state == XenbusStateClosed) {
|
|
+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
|
|
+ __FUNCTION__, dev->nodename);
|
|
+ xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitialised:
|
|
+ case XenbusStateConnected:
|
|
+ /* Ensure we connect even when two watches fire in
|
|
+ close successsion and we miss the intermediate value
|
|
+ of frontend_state. */
|
|
+ if (dev->state == XenbusStateConnected)
|
|
+ break;
|
|
+
|
|
+ /* Enforce precondition before potential leak point.
|
|
+ * blkif_disconnect() is idempotent.
|
|
+ */
|
|
+ blkif_disconnect(be->blkif);
|
|
+
|
|
+ err = connect_ring(be);
|
|
+ if (err)
|
|
+ break;
|
|
+ update_blkif_status(be->blkif);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ blkif_disconnect(be->blkif);
|
|
+ xenbus_switch_state(dev, XenbusStateClosing);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosed:
|
|
+ xenbus_switch_state(dev, XenbusStateClosed);
|
|
+ if (xenbus_dev_is_online(dev))
|
|
+ break;
|
|
+ /* fall through if not online */
|
|
+ case XenbusStateUnknown:
|
|
+ /* implies blkif_disconnect() via blkback_remove() */
|
|
+ device_unregister(&dev->dev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
|
|
+ frontend_state);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* ** Connection ** */
|
|
+
|
|
+
|
|
+/**
|
|
+ * Write the physical details regarding the block device to the store, and
|
|
+ * switch to Connected state.
|
|
+ */
|
|
+static void connect(struct backend_info *be)
|
|
+{
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+
|
|
+ DPRINTK("%s", dev->otherend);
|
|
+
|
|
+ /* Supply the information about the device the frontend needs */
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "starting transaction");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ err = blkback_barrier(xbt, be, 1);
|
|
+ if (err)
|
|
+ goto abort;
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
|
|
+ vbd_size(&be->blkif->vbd));
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "writing %s/sectors",
|
|
+ dev->nodename);
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ /* FIXME: use a typename instead */
|
|
+ err = xenbus_printf(xbt, dev->nodename, "info", "%u",
|
|
+ vbd_info(&be->blkif->vbd));
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "writing %s/info",
|
|
+ dev->nodename);
|
|
+ goto abort;
|
|
+ }
|
|
+ err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
|
|
+ vbd_secsize(&be->blkif->vbd));
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "writing %s/sector-size",
|
|
+ dev->nodename);
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(dev, err, "ending transaction");
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(dev, err, "switching to Connected state",
|
|
+ dev->nodename);
|
|
+
|
|
+ return;
|
|
+ abort:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+}
|
|
+
|
|
+
|
|
+static int connect_ring(struct backend_info *be)
|
|
+{
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ unsigned long ring_ref;
|
|
+ unsigned int evtchn;
|
|
+ char protocol[64] = "";
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("%s", dev->otherend);
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref,
|
|
+ "event-channel", "%u", &evtchn, NULL);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "reading %s/ring-ref and event-channel",
|
|
+ dev->otherend);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
|
|
+ "%63s", protocol, NULL);
|
|
+ if (err)
|
|
+ strcpy(protocol, "unspecified, assuming native");
|
|
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
|
|
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
|
|
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
|
|
+ else {
|
|
+ xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
|
|
+ return -1;
|
|
+ }
|
|
+ printk(KERN_INFO
|
|
+ "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
|
|
+ ring_ref, evtchn, be->blkif->blk_protocol, protocol);
|
|
+
|
|
+ /* Map the shared frame, irq etc. */
|
|
+ err = blkif_map(be->blkif, ring_ref, evtchn);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
|
|
+ ring_ref, evtchn);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/* ** Driver Registration ** */
|
|
+
|
|
+
|
|
+static const struct xenbus_device_id blkback_ids[] = {
|
|
+ { "vbd" },
|
|
+ { "" }
|
|
+};
|
|
+
|
|
+
|
|
+static struct xenbus_driver blkback = {
|
|
+ .name = "vbd",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = blkback_ids,
|
|
+ .probe = blkback_probe,
|
|
+ .remove = blkback_remove,
|
|
+ .otherend_changed = frontend_changed
|
|
+};
|
|
+
|
|
+
|
|
+int blkif_xenbus_init(void)
|
|
+{
|
|
+ return xenbus_register_backend(&blkback);
|
|
+}
|
|
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
|
|
index 7468147..8b7fc9a6 100644
|
|
--- a/drivers/xen/events.c
|
|
+++ b/drivers/xen/events.c
|
|
@@ -97,6 +97,7 @@ struct irq_info
|
|
unsigned short gsi;
|
|
unsigned char vector;
|
|
unsigned char flags;
|
|
+ uint16_t domid;
|
|
} pirq;
|
|
} u;
|
|
};
|
|
@@ -114,7 +115,7 @@ struct cpu_evtchn_s {
|
|
static __initdata struct cpu_evtchn_s init_evtchn_mask = {
|
|
.bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
|
|
};
|
|
-static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
|
|
+static struct cpu_evtchn_s __refdata *cpu_evtchn_mask_p = &init_evtchn_mask;
|
|
|
|
static inline unsigned long *cpu_evtchn_mask(int cpu)
|
|
{
|
|
@@ -153,11 +154,13 @@ static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq)
|
|
}
|
|
|
|
static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq,
|
|
- unsigned short gsi, unsigned short vector)
|
|
+ unsigned short gsi, unsigned short vector,
|
|
+ domid_t domid)
|
|
{
|
|
return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
|
|
.cpu = 0,
|
|
- .u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } };
|
|
+ .u.pirq = { .pirq = pirq, .gsi = gsi,
|
|
+ .vector = vector, .domid = domid } };
|
|
}
|
|
|
|
/*
|
|
@@ -277,7 +280,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
|
|
|
|
BUG_ON(irq == -1);
|
|
#ifdef CONFIG_SMP
|
|
- cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
|
|
+ cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
|
|
#endif
|
|
|
|
clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
|
|
@@ -294,7 +297,7 @@ static void init_evtchn_cpu_bindings(void)
|
|
|
|
/* By default all event channels notify CPU#0. */
|
|
for_each_irq_desc(i, desc) {
|
|
- cpumask_copy(desc->affinity, cpumask_of(0));
|
|
+ cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
|
|
}
|
|
#endif
|
|
|
|
@@ -376,81 +379,69 @@ static void unmask_evtchn(int port)
|
|
put_cpu();
|
|
}
|
|
|
|
-static int get_nr_hw_irqs(void)
|
|
+static int xen_allocate_irq_dynamic(void)
|
|
{
|
|
- int ret = 1;
|
|
+ int first = 0;
|
|
+ int irq;
|
|
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
- ret = get_nr_irqs_gsi();
|
|
+ /*
|
|
+ * For an HVM guest or domain 0 which see "real" (emulated or
|
|
+ * actual repectively) GSIs we allocate dynamic IRQs
|
|
+ * e.g. those corresponding to event channels or MSIs
|
|
+ * etc. from the range above those "real" GSIs to avoid
|
|
+ * collisions.
|
|
+ */
|
|
+ if (xen_initial_domain() || xen_hvm_domain())
|
|
+ first = get_nr_irqs_gsi();
|
|
#endif
|
|
|
|
- return ret;
|
|
-}
|
|
+retry:
|
|
+ irq = irq_alloc_desc_from(first, -1);
|
|
|
|
-static int find_unbound_pirq(int type)
|
|
-{
|
|
- int rc, i;
|
|
- struct physdev_get_free_pirq op_get_free_pirq;
|
|
- op_get_free_pirq.type = type;
|
|
+ if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
|
|
+ printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
|
|
+ first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
|
|
+ goto retry;
|
|
+ }
|
|
|
|
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
|
|
- if (!rc)
|
|
- return op_get_free_pirq.pirq;
|
|
+ if (irq < 0)
|
|
+ panic("No available IRQ to bind to: increase nr_irqs!\n");
|
|
|
|
- for (i = 0; i < nr_irqs; i++) {
|
|
- if (pirq_to_irq[i] < 0)
|
|
- return i;
|
|
- }
|
|
- return -1;
|
|
+ return irq;
|
|
}
|
|
|
|
-static int find_unbound_irq(void)
|
|
+static int xen_allocate_irq_gsi(unsigned gsi)
|
|
{
|
|
- struct irq_data *data;
|
|
- int irq, res;
|
|
- int bottom = get_nr_hw_irqs();
|
|
- int top = nr_irqs-1;
|
|
-
|
|
- if (bottom == nr_irqs)
|
|
- goto no_irqs;
|
|
+ int irq;
|
|
|
|
- /* This loop starts from the top of IRQ space and goes down.
|
|
- * We need this b/c if we have a PCI device in a Xen PV guest
|
|
- * we do not have an IO-APIC (though the backend might have them)
|
|
- * mapped in. To not have a collision of physical IRQs with the Xen
|
|
- * event channels start at the top of the IRQ space for virtual IRQs.
|
|
+ /*
|
|
+ * A PV guest has no concept of a GSI (since it has no ACPI
|
|
+ * nor access to/knowledge of the physical APICs). Therefore
|
|
+ * all IRQs are dynamically allocated from the entire IRQ
|
|
+ * space.
|
|
*/
|
|
- for (irq = top; irq > bottom; irq--) {
|
|
- data = irq_get_irq_data(irq);
|
|
- /* only 15->0 have init'd desc; handle irq > 16 */
|
|
- if (!data)
|
|
- break;
|
|
- if (data->chip == &no_irq_chip)
|
|
- break;
|
|
- if (data->chip != &xen_dynamic_chip)
|
|
- continue;
|
|
- if (irq_info[irq].type == IRQT_UNBOUND)
|
|
- return irq;
|
|
- }
|
|
+ if (xen_pv_domain() && !xen_initial_domain())
|
|
+ return xen_allocate_irq_dynamic();
|
|
|
|
- if (irq == bottom)
|
|
- goto no_irqs;
|
|
+ /* Legacy IRQ descriptors are already allocated by the arch. */
|
|
+ if (gsi < NR_IRQS_LEGACY)
|
|
+ return gsi;
|
|
|
|
- res = irq_alloc_desc_at(irq, -1);
|
|
-
|
|
- if (WARN_ON(res != irq))
|
|
- return -1;
|
|
+ irq = irq_alloc_desc_at(gsi, -1);
|
|
+ if (irq < 0)
|
|
+ panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
|
|
|
|
return irq;
|
|
-
|
|
-no_irqs:
|
|
- panic("No available IRQ to bind to: increase nr_irqs!\n");
|
|
}
|
|
|
|
-static bool identity_mapped_irq(unsigned irq)
|
|
+static void xen_free_irq(unsigned irq)
|
|
{
|
|
- /* identity map all the hardware irqs */
|
|
- return irq < get_nr_hw_irqs();
|
|
+ /* Legacy IRQ descriptors are managed by the arch. */
|
|
+ if (irq < NR_IRQS_LEGACY)
|
|
+ return;
|
|
+
|
|
+ irq_free_desc(irq);
|
|
}
|
|
|
|
static void pirq_unmask_notify(int irq)
|
|
@@ -486,7 +477,7 @@ static bool probing_irq(int irq)
|
|
return desc && desc->action == NULL;
|
|
}
|
|
|
|
-static unsigned int startup_pirq(unsigned int irq)
|
|
+static unsigned int __startup_pirq(unsigned int irq)
|
|
{
|
|
struct evtchn_bind_pirq bind_pirq;
|
|
struct irq_info *info = info_for_irq(irq);
|
|
@@ -524,9 +515,15 @@ out:
|
|
return 0;
|
|
}
|
|
|
|
-static void shutdown_pirq(unsigned int irq)
|
|
+static unsigned int startup_pirq(struct irq_data *data)
|
|
+{
|
|
+ return __startup_pirq(data->irq);
|
|
+}
|
|
+
|
|
+static void shutdown_pirq(struct irq_data *data)
|
|
{
|
|
struct evtchn_close close;
|
|
+ unsigned int irq = data->irq;
|
|
struct irq_info *info = info_for_irq(irq);
|
|
int evtchn = evtchn_from_irq(irq);
|
|
|
|
@@ -546,20 +543,20 @@ static void shutdown_pirq(unsigned int irq)
|
|
info->evtchn = 0;
|
|
}
|
|
|
|
-static void enable_pirq(unsigned int irq)
|
|
+static void enable_pirq(struct irq_data *data)
|
|
{
|
|
- startup_pirq(irq);
|
|
+ startup_pirq(data);
|
|
}
|
|
|
|
-static void disable_pirq(unsigned int irq)
|
|
+static void disable_pirq(struct irq_data *data)
|
|
{
|
|
}
|
|
|
|
-static void ack_pirq(unsigned int irq)
|
|
+static void ack_pirq(struct irq_data *data)
|
|
{
|
|
- int evtchn = evtchn_from_irq(irq);
|
|
+ int evtchn = evtchn_from_irq(data->irq);
|
|
|
|
- move_native_irq(irq);
|
|
+ move_native_irq(data->irq);
|
|
|
|
if (VALID_EVTCHN(evtchn)) {
|
|
mask_evtchn(evtchn);
|
|
@@ -567,23 +564,6 @@ static void ack_pirq(unsigned int irq)
|
|
}
|
|
}
|
|
|
|
-static void end_pirq(unsigned int irq)
|
|
-{
|
|
- int evtchn = evtchn_from_irq(irq);
|
|
- struct irq_desc *desc = irq_to_desc(irq);
|
|
-
|
|
- if (WARN_ON(!desc))
|
|
- return;
|
|
-
|
|
- if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
|
|
- (IRQ_DISABLED|IRQ_PENDING)) {
|
|
- shutdown_pirq(irq);
|
|
- } else if (VALID_EVTCHN(evtchn)) {
|
|
- unmask_evtchn(evtchn);
|
|
- pirq_unmask_notify(irq);
|
|
- }
|
|
-}
|
|
-
|
|
static int find_irq_by_gsi(unsigned gsi)
|
|
{
|
|
int irq;
|
|
@@ -638,14 +618,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
|
|
goto out; /* XXX need refcount? */
|
|
}
|
|
|
|
- /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
|
|
- * we are using the !xen_initial_domain() to drop in the function.*/
|
|
- if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
|
|
- xen_pv_domain())) {
|
|
- irq = gsi;
|
|
- irq_alloc_desc_at(irq, -1);
|
|
- } else
|
|
- irq = find_unbound_irq();
|
|
+ irq = xen_allocate_irq_gsi(gsi);
|
|
|
|
set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
|
|
handle_level_irq, name);
|
|
@@ -658,12 +631,12 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
|
|
* this in the priv domain. */
|
|
if (xen_initial_domain() &&
|
|
HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
|
|
- irq_free_desc(irq);
|
|
+ xen_free_irq(irq);
|
|
irq = -ENOSPC;
|
|
goto out;
|
|
}
|
|
|
|
- irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector);
|
|
+ irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector, DOMID_SELF);
|
|
irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0;
|
|
pirq_to_irq[pirq] = irq;
|
|
|
|
@@ -674,87 +647,47 @@ out:
|
|
}
|
|
|
|
#ifdef CONFIG_PCI_MSI
|
|
-#include <linux/msi.h>
|
|
-#include "../pci/msi.h"
|
|
-
|
|
-void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
|
|
+int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
|
|
{
|
|
- spin_lock(&irq_mapping_update_lock);
|
|
-
|
|
- if (alloc & XEN_ALLOC_IRQ) {
|
|
- *irq = find_unbound_irq();
|
|
- if (*irq == -1)
|
|
- goto out;
|
|
- }
|
|
-
|
|
- if (alloc & XEN_ALLOC_PIRQ) {
|
|
- *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
|
|
- if (*pirq == -1)
|
|
- goto out;
|
|
- }
|
|
+ int rc;
|
|
+ struct physdev_get_free_pirq op_get_free_pirq;
|
|
|
|
- set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
|
|
- handle_level_irq, name);
|
|
+ op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
|
|
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
|
|
|
|
- irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0);
|
|
- pirq_to_irq[*pirq] = *irq;
|
|
+ WARN_ONCE(rc == -ENOSYS,
|
|
+ "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
|
|
|
|
-out:
|
|
- spin_unlock(&irq_mapping_update_lock);
|
|
+ return rc ? -1 : op_get_free_pirq.pirq;
|
|
}
|
|
|
|
-int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
|
|
+int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
|
|
+ int pirq, int vector, const char *name,
|
|
+ domid_t domid)
|
|
{
|
|
- int irq = -1;
|
|
- struct physdev_map_pirq map_irq;
|
|
- int rc;
|
|
- int pos;
|
|
- u32 table_offset, bir;
|
|
-
|
|
- memset(&map_irq, 0, sizeof(map_irq));
|
|
- map_irq.domid = DOMID_SELF;
|
|
- map_irq.type = MAP_PIRQ_TYPE_MSI;
|
|
- map_irq.index = -1;
|
|
- map_irq.pirq = -1;
|
|
- map_irq.bus = dev->bus->number;
|
|
- map_irq.devfn = dev->devfn;
|
|
-
|
|
- if (type == PCI_CAP_ID_MSIX) {
|
|
- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
|
|
-
|
|
- pci_read_config_dword(dev, msix_table_offset_reg(pos),
|
|
- &table_offset);
|
|
- bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
|
|
-
|
|
- map_irq.table_base = pci_resource_start(dev, bir);
|
|
- map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
|
|
- }
|
|
+ int irq, ret;
|
|
|
|
spin_lock(&irq_mapping_update_lock);
|
|
|
|
- irq = find_unbound_irq();
|
|
-
|
|
+ irq = xen_allocate_irq_dynamic();
|
|
if (irq == -1)
|
|
goto out;
|
|
|
|
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
|
|
- if (rc) {
|
|
- printk(KERN_WARNING "xen map irq failed %d\n", rc);
|
|
-
|
|
- irq_free_desc(irq);
|
|
-
|
|
- irq = -1;
|
|
- goto out;
|
|
- }
|
|
- irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
|
|
-
|
|
set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
|
|
- handle_level_irq,
|
|
- (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
|
|
+ handle_level_irq, name);
|
|
|
|
+ irq_info[irq] = mk_pirq_info(0, pirq, 0, vector, domid);
|
|
+ pirq_to_irq[pirq] = irq;
|
|
+ ret = set_irq_msi(irq, msidesc);
|
|
+ if (ret < 0)
|
|
+ goto error_irq;
|
|
out:
|
|
spin_unlock(&irq_mapping_update_lock);
|
|
return irq;
|
|
+error_irq:
|
|
+ spin_unlock(&irq_mapping_update_lock);
|
|
+ xen_free_irq(irq);
|
|
+ return -1;
|
|
}
|
|
#endif
|
|
|
|
@@ -773,17 +706,25 @@ int xen_destroy_irq(int irq)
|
|
|
|
if (xen_initial_domain()) {
|
|
unmap_irq.pirq = info->u.pirq.pirq;
|
|
- unmap_irq.domid = DOMID_SELF;
|
|
+ unmap_irq.domid = info->u.pirq.domid;
|
|
rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
|
|
- if (rc) {
|
|
+ /* If another domain quits without making the pci_disable_msix
|
|
+ * call, the Xen hypervisor takes care of freeing the PIRQs
|
|
+ * (free_domain_pirqs).
|
|
+ */
|
|
+ if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
|
|
+ printk(KERN_INFO "domain %d does not have %d anymore\n",
|
|
+ info->u.pirq.domid, info->u.pirq.pirq);
|
|
+ else if (rc) {
|
|
printk(KERN_WARNING "unmap irq failed %d\n", rc);
|
|
goto out;
|
|
}
|
|
- pirq_to_irq[info->u.pirq.pirq] = -1;
|
|
}
|
|
+ pirq_to_irq[info->u.pirq.pirq] = -1;
|
|
+
|
|
irq_info[irq] = mk_unbound_info();
|
|
|
|
- irq_free_desc(irq);
|
|
+ xen_free_irq(irq);
|
|
|
|
out:
|
|
spin_unlock(&irq_mapping_update_lock);
|
|
@@ -805,6 +746,12 @@ int xen_irq_from_pirq(unsigned pirq)
|
|
return pirq_to_irq[pirq];
|
|
}
|
|
|
|
+
|
|
+int xen_pirq_from_irq(unsigned irq)
|
|
+{
|
|
+ return pirq_from_irq(irq);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
|
|
int bind_evtchn_to_irq(unsigned int evtchn)
|
|
{
|
|
int irq;
|
|
@@ -814,7 +761,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
|
|
irq = evtchn_to_irq[evtchn];
|
|
|
|
if (irq == -1) {
|
|
- irq = find_unbound_irq();
|
|
+ irq = xen_allocate_irq_dynamic();
|
|
|
|
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
|
|
handle_fasteoi_irq, "event");
|
|
@@ -839,7 +786,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
|
|
irq = per_cpu(ipi_to_irq, cpu)[ipi];
|
|
|
|
if (irq == -1) {
|
|
- irq = find_unbound_irq();
|
|
+ irq = xen_allocate_irq_dynamic();
|
|
if (irq < 0)
|
|
goto out;
|
|
|
|
@@ -864,6 +811,21 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
|
|
return irq;
|
|
}
|
|
|
|
+static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
|
|
+ unsigned int remote_port)
|
|
+{
|
|
+ struct evtchn_bind_interdomain bind_interdomain;
|
|
+ int err;
|
|
+
|
|
+ bind_interdomain.remote_dom = remote_domain;
|
|
+ bind_interdomain.remote_port = remote_port;
|
|
+
|
|
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
|
|
+ &bind_interdomain);
|
|
+
|
|
+ return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
|
|
+}
|
|
+
|
|
|
|
int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
|
|
{
|
|
@@ -875,7 +837,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
|
|
irq = per_cpu(virq_to_irq, cpu)[virq];
|
|
|
|
if (irq == -1) {
|
|
- irq = find_unbound_irq();
|
|
+ irq = xen_allocate_irq_dynamic();
|
|
|
|
set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
|
|
handle_percpu_irq, "virq");
|
|
@@ -934,7 +896,7 @@ static void unbind_from_irq(unsigned int irq)
|
|
if (irq_info[irq].type != IRQT_UNBOUND) {
|
|
irq_info[irq] = mk_unbound_info();
|
|
|
|
- irq_free_desc(irq);
|
|
+ xen_free_irq(irq);
|
|
}
|
|
|
|
spin_unlock(&irq_mapping_update_lock);
|
|
@@ -959,6 +921,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
|
|
}
|
|
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
|
|
|
|
+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
|
|
+ unsigned int remote_port,
|
|
+ irq_handler_t handler,
|
|
+ unsigned long irqflags,
|
|
+ const char *devname,
|
|
+ void *dev_id)
|
|
+{
|
|
+ int irq, retval;
|
|
+
|
|
+ irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
|
|
+ if (irq < 0)
|
|
+ return irq;
|
|
+
|
|
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
|
+ if (retval != 0) {
|
|
+ unbind_from_irq(irq);
|
|
+ return retval;
|
|
+ }
|
|
+
|
|
+ return irq;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
|
|
+
|
|
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
|
|
irq_handler_t handler,
|
|
unsigned long irqflags, const char *devname, void *dev_id)
|
|
@@ -990,7 +975,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
|
|
if (irq < 0)
|
|
return irq;
|
|
|
|
- irqflags |= IRQF_NO_SUSPEND;
|
|
+ irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
|
|
retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
|
if (retval != 0) {
|
|
unbind_from_irq(irq);
|
|
@@ -1234,11 +1219,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
|
|
return 0;
|
|
}
|
|
|
|
-static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
|
|
+static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
|
|
+ bool force)
|
|
{
|
|
unsigned tcpu = cpumask_first(dest);
|
|
|
|
- return rebind_irq_to_cpu(irq, tcpu);
|
|
+ return rebind_irq_to_cpu(data->irq, tcpu);
|
|
}
|
|
|
|
int resend_irq_on_evtchn(unsigned int irq)
|
|
@@ -1257,35 +1243,35 @@ int resend_irq_on_evtchn(unsigned int irq)
|
|
return 1;
|
|
}
|
|
|
|
-static void enable_dynirq(unsigned int irq)
|
|
+static void enable_dynirq(struct irq_data *data)
|
|
{
|
|
- int evtchn = evtchn_from_irq(irq);
|
|
+ int evtchn = evtchn_from_irq(data->irq);
|
|
|
|
if (VALID_EVTCHN(evtchn))
|
|
unmask_evtchn(evtchn);
|
|
}
|
|
|
|
-static void disable_dynirq(unsigned int irq)
|
|
+static void disable_dynirq(struct irq_data *data)
|
|
{
|
|
- int evtchn = evtchn_from_irq(irq);
|
|
+ int evtchn = evtchn_from_irq(data->irq);
|
|
|
|
if (VALID_EVTCHN(evtchn))
|
|
mask_evtchn(evtchn);
|
|
}
|
|
|
|
-static void ack_dynirq(unsigned int irq)
|
|
+static void ack_dynirq(struct irq_data *data)
|
|
{
|
|
- int evtchn = evtchn_from_irq(irq);
|
|
+ int evtchn = evtchn_from_irq(data->irq);
|
|
|
|
- move_masked_irq(irq);
|
|
+ move_masked_irq(data->irq);
|
|
|
|
if (VALID_EVTCHN(evtchn))
|
|
unmask_evtchn(evtchn);
|
|
}
|
|
|
|
-static int retrigger_dynirq(unsigned int irq)
|
|
+static int retrigger_dynirq(struct irq_data *data)
|
|
{
|
|
- int evtchn = evtchn_from_irq(irq);
|
|
+ int evtchn = evtchn_from_irq(data->irq);
|
|
struct shared_info *sh = HYPERVISOR_shared_info;
|
|
int ret = 0;
|
|
|
|
@@ -1334,7 +1320,7 @@ static void restore_cpu_pirqs(void)
|
|
|
|
printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
|
|
|
|
- startup_pirq(irq);
|
|
+ __startup_pirq(irq);
|
|
}
|
|
}
|
|
|
|
@@ -1442,10 +1428,21 @@ void xen_poll_irq(int irq)
|
|
xen_poll_irq_timeout(irq, 0 /* no timeout */);
|
|
}
|
|
|
|
+/* Check whether the IRQ line is shared with other guests. */
|
|
+int xen_test_irq_shared(int irq)
|
|
+{
|
|
+ struct irq_info *info = info_for_irq(irq);
|
|
+ struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq };
|
|
+
|
|
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
|
|
+ return 0;
|
|
+ return !(irq_status.flags & XENIRQSTAT_shared);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(xen_test_irq_shared);
|
|
+
|
|
void xen_irq_resume(void)
|
|
{
|
|
unsigned int cpu, irq, evtchn;
|
|
- struct irq_desc *desc;
|
|
|
|
init_evtchn_cpu_bindings();
|
|
|
|
@@ -1465,66 +1462,48 @@ void xen_irq_resume(void)
|
|
restore_cpu_ipis(cpu);
|
|
}
|
|
|
|
- /*
|
|
- * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
|
|
- * are not handled by the IRQ core.
|
|
- */
|
|
- for_each_irq_desc(irq, desc) {
|
|
- if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
|
|
- continue;
|
|
- if (desc->status & IRQ_DISABLED)
|
|
- continue;
|
|
-
|
|
- evtchn = evtchn_from_irq(irq);
|
|
- if (evtchn == -1)
|
|
- continue;
|
|
-
|
|
- unmask_evtchn(evtchn);
|
|
- }
|
|
-
|
|
restore_cpu_pirqs();
|
|
}
|
|
|
|
static struct irq_chip xen_dynamic_chip __read_mostly = {
|
|
- .name = "xen-dyn",
|
|
+ .name = "xen-dyn",
|
|
|
|
- .disable = disable_dynirq,
|
|
- .mask = disable_dynirq,
|
|
- .unmask = enable_dynirq,
|
|
+ .irq_disable = disable_dynirq,
|
|
+ .irq_mask = disable_dynirq,
|
|
+ .irq_unmask = enable_dynirq,
|
|
|
|
- .eoi = ack_dynirq,
|
|
- .set_affinity = set_affinity_irq,
|
|
- .retrigger = retrigger_dynirq,
|
|
+ .irq_eoi = ack_dynirq,
|
|
+ .irq_set_affinity = set_affinity_irq,
|
|
+ .irq_retrigger = retrigger_dynirq,
|
|
};
|
|
|
|
static struct irq_chip xen_pirq_chip __read_mostly = {
|
|
- .name = "xen-pirq",
|
|
+ .name = "xen-pirq",
|
|
|
|
- .startup = startup_pirq,
|
|
- .shutdown = shutdown_pirq,
|
|
+ .irq_startup = startup_pirq,
|
|
+ .irq_shutdown = shutdown_pirq,
|
|
|
|
- .enable = enable_pirq,
|
|
- .unmask = enable_pirq,
|
|
+ .irq_enable = enable_pirq,
|
|
+ .irq_unmask = enable_pirq,
|
|
|
|
- .disable = disable_pirq,
|
|
- .mask = disable_pirq,
|
|
+ .irq_disable = disable_pirq,
|
|
+ .irq_mask = disable_pirq,
|
|
|
|
- .ack = ack_pirq,
|
|
- .end = end_pirq,
|
|
+ .irq_ack = ack_pirq,
|
|
|
|
- .set_affinity = set_affinity_irq,
|
|
+ .irq_set_affinity = set_affinity_irq,
|
|
|
|
- .retrigger = retrigger_dynirq,
|
|
+ .irq_retrigger = retrigger_dynirq,
|
|
};
|
|
|
|
static struct irq_chip xen_percpu_chip __read_mostly = {
|
|
- .name = "xen-percpu",
|
|
+ .name = "xen-percpu",
|
|
|
|
- .disable = disable_dynirq,
|
|
- .mask = disable_dynirq,
|
|
- .unmask = enable_dynirq,
|
|
+ .irq_disable = disable_dynirq,
|
|
+ .irq_mask = disable_dynirq,
|
|
+ .irq_unmask = enable_dynirq,
|
|
|
|
- .ack = ack_dynirq,
|
|
+ .irq_ack = ack_dynirq,
|
|
};
|
|
|
|
int xen_set_callback_via(uint64_t via)
|
|
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
|
|
new file mode 100644
|
|
index 0000000..a7ffdfe
|
|
--- /dev/null
|
|
+++ b/drivers/xen/gntalloc.c
|
|
@@ -0,0 +1,545 @@
|
|
+/******************************************************************************
|
|
+ * gntalloc.c
|
|
+ *
|
|
+ * Device for creating grant references (in user-space) that may be shared
|
|
+ * with other domains.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * This driver exists to allow userspace programs in Linux to allocate kernel
|
|
+ * memory that will later be shared with another domain. Without this device,
|
|
+ * Linux userspace programs cannot create grant references.
|
|
+ *
|
|
+ * How this stuff works:
|
|
+ * X -> granting a page to Y
|
|
+ * Y -> mapping the grant from X
|
|
+ *
|
|
+ * 1. X uses the gntalloc device to allocate a page of kernel memory, P.
|
|
+ * 2. X creates an entry in the grant table that says domid(Y) can access P.
|
|
+ * This is done without a hypercall unless the grant table needs expansion.
|
|
+ * 3. X gives the grant reference identifier, GREF, to Y.
|
|
+ * 4. Y maps the page, either directly into kernel memory for use in a backend
|
|
+ * driver, or via a the gntdev device to map into the address space of an
|
|
+ * application running in Y. This is the first point at which Xen does any
|
|
+ * tracking of the page.
|
|
+ * 5. A program in X mmap()s a segment of the gntalloc device that corresponds
|
|
+ * to the shared page, and can now communicate with Y over the shared page.
|
|
+ *
|
|
+ *
|
|
+ * NOTE TO USERSPACE LIBRARIES:
|
|
+ * The grant allocation and mmap()ing are, naturally, two separate operations.
|
|
+ * You set up the sharing by calling the create ioctl() and then the mmap().
|
|
+ * Teardown requires munmap() and either close() or ioctl().
|
|
+ *
|
|
+ * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
|
|
+ * reference, this device can be used to consume kernel memory by leaving grant
|
|
+ * references mapped by another domain when an application exits. Therefore,
|
|
+ * there is a global limit on the number of pages that can be allocated. When
|
|
+ * all references to the page are unmapped, it will be freed during the next
|
|
+ * grant operation.
|
|
+ */
|
|
+
|
|
+#include <linux/atomic.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/miscdevice.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/device.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/uaccess.h>
|
|
+#include <linux/types.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/highmem.h>
|
|
+
|
|
+#include <xen/xen.h>
|
|
+#include <xen/page.h>
|
|
+#include <xen/grant_table.h>
|
|
+#include <xen/gntalloc.h>
|
|
+#include <xen/events.h>
|
|
+
|
|
+static int limit = 1024;
|
|
+module_param(limit, int, 0644);
|
|
+MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
|
|
+ "the gntalloc device");
|
|
+
|
|
+static LIST_HEAD(gref_list);
|
|
+static DEFINE_SPINLOCK(gref_lock);
|
|
+static int gref_size;
|
|
+
|
|
+struct notify_info {
|
|
+ uint16_t pgoff:12; /* Bits 0-11: Offset of the byte to clear */
|
|
+ uint16_t flags:2; /* Bits 12-13: Unmap notification flags */
|
|
+ int event; /* Port (event channel) to notify */
|
|
+};
|
|
+
|
|
+/* Metadata on a grant reference. */
|
|
+struct gntalloc_gref {
|
|
+ struct list_head next_gref; /* list entry gref_list */
|
|
+ struct list_head next_file; /* list entry file->list, if open */
|
|
+ struct page *page; /* The shared page */
|
|
+ uint64_t file_index; /* File offset for mmap() */
|
|
+ unsigned int users; /* Use count - when zero, waiting on Xen */
|
|
+ grant_ref_t gref_id; /* The grant reference number */
|
|
+ struct notify_info notify; /* Unmap notification */
|
|
+};
|
|
+
|
|
+struct gntalloc_file_private_data {
|
|
+ struct list_head list;
|
|
+ uint64_t index;
|
|
+};
|
|
+
|
|
+static void __del_gref(struct gntalloc_gref *gref);
|
|
+
|
|
+static void do_cleanup(void)
|
|
+{
|
|
+ struct gntalloc_gref *gref, *n;
|
|
+ list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
|
|
+ if (!gref->users)
|
|
+ __del_gref(gref);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
|
|
+ uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
|
|
+{
|
|
+ int i, rc, readonly;
|
|
+ LIST_HEAD(queue_gref);
|
|
+ LIST_HEAD(queue_file);
|
|
+ struct gntalloc_gref *gref;
|
|
+
|
|
+ readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
|
|
+ rc = -ENOMEM;
|
|
+ for (i = 0; i < op->count; i++) {
|
|
+ gref = kzalloc(sizeof(*gref), GFP_KERNEL);
|
|
+ if (!gref)
|
|
+ goto undo;
|
|
+ list_add_tail(&gref->next_gref, &queue_gref);
|
|
+ list_add_tail(&gref->next_file, &queue_file);
|
|
+ gref->users = 1;
|
|
+ gref->file_index = op->index + i * PAGE_SIZE;
|
|
+ gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
|
|
+ if (!gref->page)
|
|
+ goto undo;
|
|
+
|
|
+ /* Grant foreign access to the page. */
|
|
+ gref->gref_id = gnttab_grant_foreign_access(op->domid,
|
|
+ pfn_to_mfn(page_to_pfn(gref->page)), readonly);
|
|
+ if (gref->gref_id < 0) {
|
|
+ rc = gref->gref_id;
|
|
+ goto undo;
|
|
+ }
|
|
+ gref_ids[i] = gref->gref_id;
|
|
+ }
|
|
+
|
|
+ /* Add to gref lists. */
|
|
+ spin_lock(&gref_lock);
|
|
+ list_splice_tail(&queue_gref, &gref_list);
|
|
+ list_splice_tail(&queue_file, &priv->list);
|
|
+ spin_unlock(&gref_lock);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+undo:
|
|
+ spin_lock(&gref_lock);
|
|
+ gref_size -= (op->count - i);
|
|
+
|
|
+ list_for_each_entry(gref, &queue_file, next_file) {
|
|
+ /* __del_gref does not remove from queue_file */
|
|
+ __del_gref(gref);
|
|
+ }
|
|
+
|
|
+ /* It's possible for the target domain to map the just-allocated grant
|
|
+ * references by blindly guessing their IDs; if this is done, then
|
|
+ * __del_gref will leave them in the queue_gref list. They need to be
|
|
+ * added to the global list so that we can free them when they are no
|
|
+ * longer referenced.
|
|
+ */
|
|
+ if (unlikely(!list_empty(&queue_gref)))
|
|
+ list_splice_tail(&queue_gref, &gref_list);
|
|
+ spin_unlock(&gref_lock);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static void __del_gref(struct gntalloc_gref *gref)
|
|
+{
|
|
+ if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
|
|
+ uint8_t *tmp = kmap(gref->page);
|
|
+ tmp[gref->notify.pgoff] = 0;
|
|
+ kunmap(gref->page);
|
|
+ }
|
|
+ if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
|
|
+ notify_remote_via_evtchn(gref->notify.event);
|
|
+
|
|
+ gref->notify.flags = 0;
|
|
+
|
|
+ if (gref->gref_id > 0) {
|
|
+ if (gnttab_query_foreign_access(gref->gref_id))
|
|
+ return;
|
|
+
|
|
+ if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ gref_size--;
|
|
+ list_del(&gref->next_gref);
|
|
+
|
|
+ if (gref->page)
|
|
+ __free_page(gref->page);
|
|
+
|
|
+ kfree(gref);
|
|
+}
|
|
+
|
|
+/* finds contiguous grant references in a file, returns the first */
|
|
+static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
|
|
+ uint64_t index, uint32_t count)
|
|
+{
|
|
+ struct gntalloc_gref *rv = NULL, *gref;
|
|
+ list_for_each_entry(gref, &priv->list, next_file) {
|
|
+ if (gref->file_index == index && !rv)
|
|
+ rv = gref;
|
|
+ if (rv) {
|
|
+ if (gref->file_index != index)
|
|
+ return NULL;
|
|
+ index += PAGE_SIZE;
|
|
+ count--;
|
|
+ if (count == 0)
|
|
+ return rv;
|
|
+ }
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * -------------------------------------
|
|
+ * File operations.
|
|
+ * -------------------------------------
|
|
+ */
|
|
+static int gntalloc_open(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ struct gntalloc_file_private_data *priv;
|
|
+
|
|
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
|
|
+ if (!priv)
|
|
+ goto out_nomem;
|
|
+ INIT_LIST_HEAD(&priv->list);
|
|
+
|
|
+ filp->private_data = priv;
|
|
+
|
|
+ pr_debug("%s: priv %p\n", __func__, priv);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+out_nomem:
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+static int gntalloc_release(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ struct gntalloc_file_private_data *priv = filp->private_data;
|
|
+ struct gntalloc_gref *gref;
|
|
+
|
|
+ pr_debug("%s: priv %p\n", __func__, priv);
|
|
+
|
|
+ spin_lock(&gref_lock);
|
|
+ while (!list_empty(&priv->list)) {
|
|
+ gref = list_entry(priv->list.next,
|
|
+ struct gntalloc_gref, next_file);
|
|
+ list_del(&gref->next_file);
|
|
+ gref->users--;
|
|
+ if (gref->users == 0)
|
|
+ __del_gref(gref);
|
|
+ }
|
|
+ kfree(priv);
|
|
+ spin_unlock(&gref_lock);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
|
|
+ struct ioctl_gntalloc_alloc_gref __user *arg)
|
|
+{
|
|
+ int rc = 0;
|
|
+ struct ioctl_gntalloc_alloc_gref op;
|
|
+ uint32_t *gref_ids;
|
|
+
|
|
+ pr_debug("%s: priv %p\n", __func__, priv);
|
|
+
|
|
+ if (copy_from_user(&op, arg, sizeof(op))) {
|
|
+ rc = -EFAULT;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
|
|
+ if (!gref_ids) {
|
|
+ rc = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ spin_lock(&gref_lock);
|
|
+ /* Clean up pages that were at zero (local) users but were still mapped
|
|
+ * by remote domains. Since those pages count towards the limit that we
|
|
+ * are about to enforce, removing them here is a good idea.
|
|
+ */
|
|
+ do_cleanup();
|
|
+ if (gref_size + op.count > limit) {
|
|
+ spin_unlock(&gref_lock);
|
|
+ rc = -ENOSPC;
|
|
+ goto out_free;
|
|
+ }
|
|
+ gref_size += op.count;
|
|
+ op.index = priv->index;
|
|
+ priv->index += op.count * PAGE_SIZE;
|
|
+ spin_unlock(&gref_lock);
|
|
+
|
|
+ rc = add_grefs(&op, gref_ids, priv);
|
|
+ if (rc < 0)
|
|
+ goto out_free;
|
|
+
|
|
+ /* Once we finish add_grefs, it is unsafe to touch the new reference,
|
|
+ * since it is possible for a concurrent ioctl to remove it (by guessing
|
|
+ * its index). If the userspace application doesn't provide valid memory
|
|
+ * to write the IDs to, then it will need to close the file in order to
|
|
+ * release - which it will do by segfaulting when it tries to access the
|
|
+ * IDs to close them.
|
|
+ */
|
|
+ if (copy_to_user(arg, &op, sizeof(op))) {
|
|
+ rc = -EFAULT;
|
|
+ goto out_free;
|
|
+ }
|
|
+ if (copy_to_user(arg->gref_ids, gref_ids,
|
|
+ sizeof(gref_ids[0]) * op.count)) {
|
|
+ rc = -EFAULT;
|
|
+ goto out_free;
|
|
+ }
|
|
+
|
|
+out_free:
|
|
+ kfree(gref_ids);
|
|
+out:
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
|
|
+ void __user *arg)
|
|
+{
|
|
+ int i, rc = 0;
|
|
+ struct ioctl_gntalloc_dealloc_gref op;
|
|
+ struct gntalloc_gref *gref, *n;
|
|
+
|
|
+ pr_debug("%s: priv %p\n", __func__, priv);
|
|
+
|
|
+ if (copy_from_user(&op, arg, sizeof(op))) {
|
|
+ rc = -EFAULT;
|
|
+ goto dealloc_grant_out;
|
|
+ }
|
|
+
|
|
+ spin_lock(&gref_lock);
|
|
+ gref = find_grefs(priv, op.index, op.count);
|
|
+ if (gref) {
|
|
+ /* Remove from the file list only, and decrease reference count.
|
|
+ * The later call to do_cleanup() will remove from gref_list and
|
|
+ * free the memory if the pages aren't mapped anywhere.
|
|
+ */
|
|
+ for (i = 0; i < op.count; i++) {
|
|
+ n = list_entry(gref->next_file.next,
|
|
+ struct gntalloc_gref, next_file);
|
|
+ list_del(&gref->next_file);
|
|
+ gref->users--;
|
|
+ gref = n;
|
|
+ }
|
|
+ } else {
|
|
+ rc = -EINVAL;
|
|
+ }
|
|
+
|
|
+ do_cleanup();
|
|
+
|
|
+ spin_unlock(&gref_lock);
|
|
+dealloc_grant_out:
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
|
|
+ void __user *arg)
|
|
+{
|
|
+ struct ioctl_gntalloc_unmap_notify op;
|
|
+ struct gntalloc_gref *gref;
|
|
+ uint64_t index;
|
|
+ int pgoff;
|
|
+ int rc;
|
|
+
|
|
+ if (copy_from_user(&op, arg, sizeof(op)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ index = op.index & ~(PAGE_SIZE - 1);
|
|
+ pgoff = op.index & (PAGE_SIZE - 1);
|
|
+
|
|
+ spin_lock(&gref_lock);
|
|
+
|
|
+ gref = find_grefs(priv, index, 1);
|
|
+ if (!gref) {
|
|
+ rc = -ENOENT;
|
|
+ goto unlock_out;
|
|
+ }
|
|
+
|
|
+ if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
|
|
+ rc = -EINVAL;
|
|
+ goto unlock_out;
|
|
+ }
|
|
+
|
|
+ gref->notify.flags = op.action;
|
|
+ gref->notify.pgoff = pgoff;
|
|
+ gref->notify.event = op.event_channel_port;
|
|
+ rc = 0;
|
|
+ unlock_out:
|
|
+ spin_unlock(&gref_lock);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
|
|
+ unsigned long arg)
|
|
+{
|
|
+ struct gntalloc_file_private_data *priv = filp->private_data;
|
|
+
|
|
+ switch (cmd) {
|
|
+ case IOCTL_GNTALLOC_ALLOC_GREF:
|
|
+ return gntalloc_ioctl_alloc(priv, (void __user *)arg);
|
|
+
|
|
+ case IOCTL_GNTALLOC_DEALLOC_GREF:
|
|
+ return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
|
|
+
|
|
+ case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
|
|
+ return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg);
|
|
+
|
|
+ default:
|
|
+ return -ENOIOCTLCMD;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void gntalloc_vma_close(struct vm_area_struct *vma)
|
|
+{
|
|
+ struct gntalloc_gref *gref = vma->vm_private_data;
|
|
+ if (!gref)
|
|
+ return;
|
|
+
|
|
+ spin_lock(&gref_lock);
|
|
+ gref->users--;
|
|
+ if (gref->users == 0)
|
|
+ __del_gref(gref);
|
|
+ spin_unlock(&gref_lock);
|
|
+}
|
|
+
|
|
+static struct vm_operations_struct gntalloc_vmops = {
|
|
+ .close = gntalloc_vma_close,
|
|
+};
|
|
+
|
|
+static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
|
|
+{
|
|
+ struct gntalloc_file_private_data *priv = filp->private_data;
|
|
+ struct gntalloc_gref *gref;
|
|
+ int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
|
+ int rv, i;
|
|
+
|
|
+ pr_debug("%s: priv %p, page %lu+%d\n", __func__,
|
|
+ priv, vma->vm_pgoff, count);
|
|
+
|
|
+ if (!(vma->vm_flags & VM_SHARED)) {
|
|
+ printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ spin_lock(&gref_lock);
|
|
+ gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
|
|
+ if (gref == NULL) {
|
|
+ rv = -ENOENT;
|
|
+ pr_debug("%s: Could not find grant reference",
|
|
+ __func__);
|
|
+ goto out_unlock;
|
|
+ }
|
|
+
|
|
+ vma->vm_private_data = gref;
|
|
+
|
|
+ vma->vm_flags |= VM_RESERVED;
|
|
+ vma->vm_flags |= VM_DONTCOPY;
|
|
+ vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
|
|
+
|
|
+ vma->vm_ops = &gntalloc_vmops;
|
|
+
|
|
+ for (i = 0; i < count; i++) {
|
|
+ gref->users++;
|
|
+ rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
|
|
+ gref->page);
|
|
+ if (rv)
|
|
+ goto out_unlock;
|
|
+
|
|
+ gref = list_entry(gref->next_file.next,
|
|
+ struct gntalloc_gref, next_file);
|
|
+ }
|
|
+ rv = 0;
|
|
+
|
|
+out_unlock:
|
|
+ spin_unlock(&gref_lock);
|
|
+ return rv;
|
|
+}
|
|
+
|
|
+static const struct file_operations gntalloc_fops = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .open = gntalloc_open,
|
|
+ .release = gntalloc_release,
|
|
+ .unlocked_ioctl = gntalloc_ioctl,
|
|
+ .mmap = gntalloc_mmap
|
|
+};
|
|
+
|
|
+/*
|
|
+ * -------------------------------------
|
|
+ * Module creation/destruction.
|
|
+ * -------------------------------------
|
|
+ */
|
|
+static struct miscdevice gntalloc_miscdev = {
|
|
+ .minor = MISC_DYNAMIC_MINOR,
|
|
+ .name = "xen/gntalloc",
|
|
+ .fops = &gntalloc_fops,
|
|
+};
|
|
+
|
|
+static int __init gntalloc_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ if (!xen_domain())
|
|
+ return -ENODEV;
|
|
+
|
|
+ err = misc_register(&gntalloc_miscdev);
|
|
+ if (err != 0) {
|
|
+ printk(KERN_ERR "Could not register misc gntalloc device\n");
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ pr_debug("Created grant allocation device at %d,%d\n",
|
|
+ MISC_MAJOR, gntalloc_miscdev.minor);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void __exit gntalloc_exit(void)
|
|
+{
|
|
+ misc_deregister(&gntalloc_miscdev);
|
|
+}
|
|
+
|
|
+module_init(gntalloc_init);
|
|
+module_exit(gntalloc_exit);
|
|
+
|
|
+MODULE_LICENSE("GPL");
|
|
+MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
|
|
+ "Daniel De Graaf <dgdegra@tycho.nsa.gov>");
|
|
+MODULE_DESCRIPTION("User-space grant reference allocator driver");
|
|
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
|
|
index 1e31cdc..d43ff30 100644
|
|
--- a/drivers/xen/gntdev.c
|
|
+++ b/drivers/xen/gntdev.c
|
|
@@ -32,10 +32,12 @@
|
|
#include <linux/sched.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/slab.h>
|
|
+#include <linux/highmem.h>
|
|
|
|
#include <xen/xen.h>
|
|
#include <xen/grant_table.h>
|
|
#include <xen/gntdev.h>
|
|
+#include <xen/events.h>
|
|
#include <asm/xen/hypervisor.h>
|
|
#include <asm/xen/hypercall.h>
|
|
#include <asm/xen/page.h>
|
|
@@ -45,35 +47,46 @@ MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
|
|
"Gerd Hoffmann <kraxel@redhat.com>");
|
|
MODULE_DESCRIPTION("User-space granted page access driver");
|
|
|
|
-static int limit = 1024;
|
|
+static int limit = 1024*1024;
|
|
module_param(limit, int, 0644);
|
|
-MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at "
|
|
- "once by a gntdev instance");
|
|
+MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
|
|
+ "the gntdev device");
|
|
+
|
|
+static atomic_t pages_mapped = ATOMIC_INIT(0);
|
|
+
|
|
+static int use_ptemod;
|
|
|
|
struct gntdev_priv {
|
|
struct list_head maps;
|
|
- uint32_t used;
|
|
- uint32_t limit;
|
|
/* lock protects maps from concurrent changes */
|
|
spinlock_t lock;
|
|
struct mm_struct *mm;
|
|
struct mmu_notifier mn;
|
|
};
|
|
|
|
+struct unmap_notify {
|
|
+ int flags;
|
|
+ /* Address relative to the start of the grant_map */
|
|
+ int addr;
|
|
+ int event;
|
|
+};
|
|
+
|
|
struct grant_map {
|
|
struct list_head next;
|
|
- struct gntdev_priv *priv;
|
|
struct vm_area_struct *vma;
|
|
int index;
|
|
int count;
|
|
int flags;
|
|
- int is_mapped;
|
|
+ atomic_t users;
|
|
+ struct unmap_notify notify;
|
|
struct ioctl_gntdev_grant_ref *grants;
|
|
struct gnttab_map_grant_ref *map_ops;
|
|
struct gnttab_unmap_grant_ref *unmap_ops;
|
|
struct page **pages;
|
|
};
|
|
|
|
+static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
|
|
+
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
static void gntdev_print_maps(struct gntdev_priv *priv,
|
|
@@ -82,9 +95,7 @@ static void gntdev_print_maps(struct gntdev_priv *priv,
|
|
#ifdef DEBUG
|
|
struct grant_map *map;
|
|
|
|
- pr_debug("maps list (priv %p, usage %d/%d)\n",
|
|
- priv, priv->used, priv->limit);
|
|
-
|
|
+ pr_debug("%s: maps list (priv %p)\n", __func__, priv);
|
|
list_for_each_entry(map, &priv->maps, next)
|
|
pr_debug(" index %2d, count %2d %s\n",
|
|
map->index, map->count,
|
|
@@ -115,14 +126,13 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
|
|
add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
|
|
if (add->pages[i] == NULL)
|
|
goto err;
|
|
+ add->map_ops[i].handle = -1;
|
|
+ add->unmap_ops[i].handle = -1;
|
|
}
|
|
|
|
add->index = 0;
|
|
add->count = count;
|
|
- add->priv = priv;
|
|
-
|
|
- if (add->count + priv->used > priv->limit)
|
|
- goto err;
|
|
+ atomic_set(&add->users, 1);
|
|
|
|
return add;
|
|
|
|
@@ -154,7 +164,6 @@ static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add)
|
|
list_add_tail(&add->next, &priv->maps);
|
|
|
|
done:
|
|
- priv->used += add->count;
|
|
gntdev_print_maps(priv, "[new]", add->index);
|
|
}
|
|
|
|
@@ -166,57 +175,57 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
|
|
list_for_each_entry(map, &priv->maps, next) {
|
|
if (map->index != index)
|
|
continue;
|
|
- if (map->count != count)
|
|
- continue;
|
|
- return map;
|
|
- }
|
|
- return NULL;
|
|
-}
|
|
-
|
|
-static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv,
|
|
- unsigned long vaddr)
|
|
-{
|
|
- struct grant_map *map;
|
|
-
|
|
- list_for_each_entry(map, &priv->maps, next) {
|
|
- if (!map->vma)
|
|
- continue;
|
|
- if (vaddr < map->vma->vm_start)
|
|
- continue;
|
|
- if (vaddr >= map->vma->vm_end)
|
|
+ if (count && map->count != count)
|
|
continue;
|
|
return map;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
-static int gntdev_del_map(struct grant_map *map)
|
|
+static void gntdev_put_map(struct grant_map *map)
|
|
{
|
|
int i;
|
|
|
|
- if (map->vma)
|
|
- return -EBUSY;
|
|
- for (i = 0; i < map->count; i++)
|
|
- if (map->unmap_ops[i].handle)
|
|
- return -EBUSY;
|
|
+ if (!map)
|
|
+ return;
|
|
|
|
- map->priv->used -= map->count;
|
|
- list_del(&map->next);
|
|
- return 0;
|
|
-}
|
|
+ if (!atomic_dec_and_test(&map->users))
|
|
+ return;
|
|
|
|
-static void gntdev_free_map(struct grant_map *map)
|
|
-{
|
|
- int i;
|
|
+ atomic_sub(map->count, &pages_mapped);
|
|
|
|
- if (!map)
|
|
- return;
|
|
+ if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
|
|
+ notify_remote_via_evtchn(map->notify.event);
|
|
+ }
|
|
+
|
|
+ if (map->pages) {
|
|
+ if (!use_ptemod)
|
|
+ unmap_grant_pages(map, 0, map->count);
|
|
|
|
- if (map->pages)
|
|
for (i = 0; i < map->count; i++) {
|
|
- if (map->pages[i])
|
|
+ uint32_t check, *tmp;
|
|
+ if (!map->pages[i])
|
|
+ continue;
|
|
+ /* XXX When unmapping in an HVM domain, Xen will
|
|
+ * sometimes end up mapping the GFN to an invalid MFN.
|
|
+ * In this case, writes will be discarded and reads will
|
|
+ * return all 0xFF bytes. Leak these unusable GFNs
|
|
+ * until Xen supports fixing their p2m mapping.
|
|
+ *
|
|
+ * Confirmed present in Xen 4.1-RC3 with HVM source
|
|
+ */
|
|
+ tmp = kmap(map->pages[i]);
|
|
+ *tmp = 0xdeaddead;
|
|
+ mb();
|
|
+ check = *tmp;
|
|
+ kunmap(map->pages[i]);
|
|
+ if (check == 0xdeaddead)
|
|
__free_page(map->pages[i]);
|
|
+ else
|
|
+ pr_debug("Discard page %d=%ld\n", i,
|
|
+ page_to_pfn(map->pages[i]));
|
|
}
|
|
+ }
|
|
kfree(map->pages);
|
|
kfree(map->grants);
|
|
kfree(map->map_ops);
|
|
@@ -231,24 +240,39 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
|
|
{
|
|
struct grant_map *map = data;
|
|
unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
|
|
+ int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
|
|
u64 pte_maddr;
|
|
|
|
BUG_ON(pgnr >= map->count);
|
|
pte_maddr = arbitrary_virt_to_machine(pte).maddr;
|
|
|
|
- gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
|
|
- GNTMAP_contains_pte | map->flags,
|
|
+ gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
|
|
map->grants[pgnr].ref,
|
|
map->grants[pgnr].domid);
|
|
- gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
|
|
- GNTMAP_contains_pte | map->flags,
|
|
- 0 /* handle */);
|
|
+ gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
|
|
+ -1 /* handle */);
|
|
return 0;
|
|
}
|
|
|
|
static int map_grant_pages(struct grant_map *map)
|
|
{
|
|
int i, err = 0;
|
|
+ phys_addr_t addr;
|
|
+
|
|
+ if (!use_ptemod) {
|
|
+ /* Note: it could already be mapped */
|
|
+ if (map->map_ops[0].handle != -1)
|
|
+ return 0;
|
|
+ for (i = 0; i < map->count; i++) {
|
|
+ addr = (phys_addr_t)
|
|
+ pfn_to_kaddr(page_to_pfn(map->pages[i]));
|
|
+ gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
|
|
+ map->grants[i].ref,
|
|
+ map->grants[i].domid);
|
|
+ gnttab_set_unmap_op(&map->unmap_ops[i], addr,
|
|
+ map->flags, -1 /* handle */);
|
|
+ }
|
|
+ }
|
|
|
|
pr_debug("map %d+%d\n", map->index, map->count);
|
|
err = gnttab_map_refs(map->map_ops, map->pages, map->count);
|
|
@@ -258,28 +282,81 @@ static int map_grant_pages(struct grant_map *map)
|
|
for (i = 0; i < map->count; i++) {
|
|
if (map->map_ops[i].status)
|
|
err = -EINVAL;
|
|
- map->unmap_ops[i].handle = map->map_ops[i].handle;
|
|
+ else {
|
|
+ BUG_ON(map->map_ops[i].handle == -1);
|
|
+ map->unmap_ops[i].handle = map->map_ops[i].handle;
|
|
+ pr_debug("map handle=%d\n", map->map_ops[i].handle);
|
|
+ }
|
|
}
|
|
return err;
|
|
}
|
|
|
|
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
|
|
+static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
|
|
{
|
|
int i, err = 0;
|
|
|
|
- pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
|
|
- err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
|
|
+ if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
|
|
+ int pgno = (map->notify.addr >> PAGE_SHIFT);
|
|
+ if (pgno >= offset && pgno < offset + pages && use_ptemod) {
|
|
+ void __user *tmp = (void __user *)
|
|
+ map->vma->vm_start + map->notify.addr;
|
|
+ err = copy_to_user(tmp, &err, 1);
|
|
+ if (err)
|
|
+ return err;
|
|
+ map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
|
|
+ } else if (pgno >= offset && pgno < offset + pages) {
|
|
+ uint8_t *tmp = kmap(map->pages[pgno]);
|
|
+ tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
|
|
+ kunmap(map->pages[pgno]);
|
|
+ map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
|
|
if (err)
|
|
return err;
|
|
|
|
for (i = 0; i < pages; i++) {
|
|
if (map->unmap_ops[offset+i].status)
|
|
err = -EINVAL;
|
|
- map->unmap_ops[offset+i].handle = 0;
|
|
+ pr_debug("unmap handle=%d st=%d\n",
|
|
+ map->unmap_ops[offset+i].handle,
|
|
+ map->unmap_ops[offset+i].status);
|
|
+ map->unmap_ops[offset+i].handle = -1;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
+static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
|
|
+{
|
|
+ int range, err = 0;
|
|
+
|
|
+ pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
|
|
+
|
|
+ /* It is possible the requested range will have a "hole" where we
|
|
+ * already unmapped some of the grants. Only unmap valid ranges.
|
|
+ */
|
|
+ while (pages && !err) {
|
|
+ while (pages && map->unmap_ops[offset].handle == -1) {
|
|
+ offset++;
|
|
+ pages--;
|
|
+ }
|
|
+ range = 0;
|
|
+ while (range < pages) {
|
|
+ if (map->unmap_ops[offset+range].handle == -1) {
|
|
+ range--;
|
|
+ break;
|
|
+ }
|
|
+ range++;
|
|
+ }
|
|
+ err = __unmap_grant_pages(map, offset, range);
|
|
+ offset += range;
|
|
+ pages -= range;
|
|
+ }
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
static void gntdev_vma_close(struct vm_area_struct *vma)
|
|
@@ -287,22 +364,13 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
|
|
struct grant_map *map = vma->vm_private_data;
|
|
|
|
pr_debug("close %p\n", vma);
|
|
- map->is_mapped = 0;
|
|
map->vma = NULL;
|
|
vma->vm_private_data = NULL;
|
|
-}
|
|
-
|
|
-static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|
-{
|
|
- pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
|
|
- vmf->virtual_address, vmf->pgoff);
|
|
- vmf->flags = VM_FAULT_ERROR;
|
|
- return 0;
|
|
+ gntdev_put_map(map);
|
|
}
|
|
|
|
static struct vm_operations_struct gntdev_vmops = {
|
|
.close = gntdev_vma_close,
|
|
- .fault = gntdev_vma_fault,
|
|
};
|
|
|
|
/* ------------------------------------------------------------------ */
|
|
@@ -320,8 +388,6 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
|
|
list_for_each_entry(map, &priv->maps, next) {
|
|
if (!map->vma)
|
|
continue;
|
|
- if (!map->is_mapped)
|
|
- continue;
|
|
if (map->vma->vm_start >= end)
|
|
continue;
|
|
if (map->vma->vm_end <= start)
|
|
@@ -386,16 +452,17 @@ static int gntdev_open(struct inode *inode, struct file *flip)
|
|
|
|
INIT_LIST_HEAD(&priv->maps);
|
|
spin_lock_init(&priv->lock);
|
|
- priv->limit = limit;
|
|
|
|
- priv->mm = get_task_mm(current);
|
|
- if (!priv->mm) {
|
|
- kfree(priv);
|
|
- return -ENOMEM;
|
|
+ if (use_ptemod) {
|
|
+ priv->mm = get_task_mm(current);
|
|
+ if (!priv->mm) {
|
|
+ kfree(priv);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ priv->mn.ops = &gntdev_mmu_ops;
|
|
+ ret = mmu_notifier_register(&priv->mn, priv->mm);
|
|
+ mmput(priv->mm);
|
|
}
|
|
- priv->mn.ops = &gntdev_mmu_ops;
|
|
- ret = mmu_notifier_register(&priv->mn, priv->mm);
|
|
- mmput(priv->mm);
|
|
|
|
if (ret) {
|
|
kfree(priv);
|
|
@@ -412,21 +479,19 @@ static int gntdev_release(struct inode *inode, struct file *flip)
|
|
{
|
|
struct gntdev_priv *priv = flip->private_data;
|
|
struct grant_map *map;
|
|
- int err;
|
|
|
|
pr_debug("priv %p\n", priv);
|
|
|
|
spin_lock(&priv->lock);
|
|
while (!list_empty(&priv->maps)) {
|
|
map = list_entry(priv->maps.next, struct grant_map, next);
|
|
- err = gntdev_del_map(map);
|
|
- if (WARN_ON(err))
|
|
- gntdev_free_map(map);
|
|
-
|
|
+ list_del(&map->next);
|
|
+ gntdev_put_map(map);
|
|
}
|
|
spin_unlock(&priv->lock);
|
|
|
|
- mmu_notifier_unregister(&priv->mn, priv->mm);
|
|
+ if (use_ptemod)
|
|
+ mmu_notifier_unregister(&priv->mn, priv->mm);
|
|
kfree(priv);
|
|
return 0;
|
|
}
|
|
@@ -443,16 +508,21 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
|
|
pr_debug("priv %p, add %d\n", priv, op.count);
|
|
if (unlikely(op.count <= 0))
|
|
return -EINVAL;
|
|
- if (unlikely(op.count > priv->limit))
|
|
- return -EINVAL;
|
|
|
|
err = -ENOMEM;
|
|
map = gntdev_alloc_map(priv, op.count);
|
|
if (!map)
|
|
return err;
|
|
+
|
|
+ if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
|
|
+ pr_debug("can't map: over limit\n");
|
|
+ gntdev_put_map(map);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
if (copy_from_user(map->grants, &u->refs,
|
|
sizeof(map->grants[0]) * op.count) != 0) {
|
|
- gntdev_free_map(map);
|
|
+ gntdev_put_map(map);
|
|
return err;
|
|
}
|
|
|
|
@@ -461,13 +531,9 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
|
|
op.index = map->index << PAGE_SHIFT;
|
|
spin_unlock(&priv->lock);
|
|
|
|
- if (copy_to_user(u, &op, sizeof(op)) != 0) {
|
|
- spin_lock(&priv->lock);
|
|
- gntdev_del_map(map);
|
|
- spin_unlock(&priv->lock);
|
|
- gntdev_free_map(map);
|
|
- return err;
|
|
- }
|
|
+ if (copy_to_user(u, &op, sizeof(op)) != 0)
|
|
+ return -EFAULT;
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -484,11 +550,12 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
|
|
|
|
spin_lock(&priv->lock);
|
|
map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
|
|
- if (map)
|
|
- err = gntdev_del_map(map);
|
|
+ if (map) {
|
|
+ list_del(&map->next);
|
|
+ gntdev_put_map(map);
|
|
+ err = 0;
|
|
+ }
|
|
spin_unlock(&priv->lock);
|
|
- if (!err)
|
|
- gntdev_free_map(map);
|
|
return err;
|
|
}
|
|
|
|
@@ -496,43 +563,66 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
|
|
struct ioctl_gntdev_get_offset_for_vaddr __user *u)
|
|
{
|
|
struct ioctl_gntdev_get_offset_for_vaddr op;
|
|
+ struct vm_area_struct *vma;
|
|
struct grant_map *map;
|
|
|
|
if (copy_from_user(&op, u, sizeof(op)) != 0)
|
|
return -EFAULT;
|
|
pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
|
|
|
|
- spin_lock(&priv->lock);
|
|
- map = gntdev_find_map_vaddr(priv, op.vaddr);
|
|
- if (map == NULL ||
|
|
- map->vma->vm_start != op.vaddr) {
|
|
- spin_unlock(&priv->lock);
|
|
+ vma = find_vma(current->mm, op.vaddr);
|
|
+ if (!vma || vma->vm_ops != &gntdev_vmops)
|
|
return -EINVAL;
|
|
- }
|
|
+
|
|
+ map = vma->vm_private_data;
|
|
+ if (!map)
|
|
+ return -EINVAL;
|
|
+
|
|
op.offset = map->index << PAGE_SHIFT;
|
|
op.count = map->count;
|
|
- spin_unlock(&priv->lock);
|
|
|
|
if (copy_to_user(u, &op, sizeof(op)) != 0)
|
|
return -EFAULT;
|
|
return 0;
|
|
}
|
|
|
|
-static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv,
|
|
- struct ioctl_gntdev_set_max_grants __user *u)
|
|
+static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
|
|
{
|
|
- struct ioctl_gntdev_set_max_grants op;
|
|
+ struct ioctl_gntdev_unmap_notify op;
|
|
+ struct grant_map *map;
|
|
+ int rc;
|
|
|
|
- if (copy_from_user(&op, u, sizeof(op)) != 0)
|
|
+ if (copy_from_user(&op, u, sizeof(op)))
|
|
return -EFAULT;
|
|
- pr_debug("priv %p, limit %d\n", priv, op.count);
|
|
- if (op.count > limit)
|
|
- return -E2BIG;
|
|
+
|
|
+ if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
|
|
+ return -EINVAL;
|
|
|
|
spin_lock(&priv->lock);
|
|
- priv->limit = op.count;
|
|
+
|
|
+ list_for_each_entry(map, &priv->maps, next) {
|
|
+ uint64_t begin = map->index << PAGE_SHIFT;
|
|
+ uint64_t end = (map->index + map->count) << PAGE_SHIFT;
|
|
+ if (op.index >= begin && op.index < end)
|
|
+ goto found;
|
|
+ }
|
|
+ rc = -ENOENT;
|
|
+ goto unlock_out;
|
|
+
|
|
+ found:
|
|
+ if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
|
|
+ (map->flags & GNTMAP_readonly)) {
|
|
+ rc = -EINVAL;
|
|
+ goto unlock_out;
|
|
+ }
|
|
+
|
|
+ map->notify.flags = op.action;
|
|
+ map->notify.addr = op.index - (map->index << PAGE_SHIFT);
|
|
+ map->notify.event = op.event_channel_port;
|
|
+ rc = 0;
|
|
+ unlock_out:
|
|
spin_unlock(&priv->lock);
|
|
- return 0;
|
|
+ return rc;
|
|
}
|
|
|
|
static long gntdev_ioctl(struct file *flip,
|
|
@@ -551,8 +641,8 @@ static long gntdev_ioctl(struct file *flip,
|
|
case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
|
|
return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
|
|
|
|
- case IOCTL_GNTDEV_SET_MAX_GRANTS:
|
|
- return gntdev_ioctl_set_max_grants(priv, ptr);
|
|
+ case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
|
|
+ return gntdev_ioctl_notify(priv, ptr);
|
|
|
|
default:
|
|
pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
|
|
@@ -568,7 +658,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
|
|
int index = vma->vm_pgoff;
|
|
int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
|
struct grant_map *map;
|
|
- int err = -EINVAL;
|
|
+ int i, err = -EINVAL;
|
|
|
|
if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
|
|
return -EINVAL;
|
|
@@ -580,47 +670,70 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
|
|
map = gntdev_find_map_index(priv, index, count);
|
|
if (!map)
|
|
goto unlock_out;
|
|
- if (map->vma)
|
|
+ if (use_ptemod && map->vma)
|
|
goto unlock_out;
|
|
- if (priv->mm != vma->vm_mm) {
|
|
+ if (use_ptemod && priv->mm != vma->vm_mm) {
|
|
printk(KERN_WARNING "Huh? Other mm?\n");
|
|
goto unlock_out;
|
|
}
|
|
|
|
+ atomic_inc(&map->users);
|
|
+
|
|
vma->vm_ops = &gntdev_vmops;
|
|
|
|
vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
|
|
|
|
vma->vm_private_data = map;
|
|
- map->vma = vma;
|
|
|
|
- map->flags = GNTMAP_host_map | GNTMAP_application_map;
|
|
- if (!(vma->vm_flags & VM_WRITE))
|
|
- map->flags |= GNTMAP_readonly;
|
|
+ if (use_ptemod)
|
|
+ map->vma = vma;
|
|
+
|
|
+ if (map->flags) {
|
|
+ if ((vma->vm_flags & VM_WRITE) &&
|
|
+ (map->flags & GNTMAP_readonly))
|
|
+ return -EINVAL;
|
|
+ } else {
|
|
+ map->flags = GNTMAP_host_map;
|
|
+ if (!(vma->vm_flags & VM_WRITE))
|
|
+ map->flags |= GNTMAP_readonly;
|
|
+ }
|
|
|
|
spin_unlock(&priv->lock);
|
|
|
|
- err = apply_to_page_range(vma->vm_mm, vma->vm_start,
|
|
- vma->vm_end - vma->vm_start,
|
|
- find_grant_ptes, map);
|
|
- if (err) {
|
|
- printk(KERN_WARNING "find_grant_ptes() failure.\n");
|
|
- return err;
|
|
+ if (use_ptemod) {
|
|
+ err = apply_to_page_range(vma->vm_mm, vma->vm_start,
|
|
+ vma->vm_end - vma->vm_start,
|
|
+ find_grant_ptes, map);
|
|
+ if (err) {
|
|
+ printk(KERN_WARNING "find_grant_ptes() failure.\n");
|
|
+ goto out_put_map;
|
|
+ }
|
|
}
|
|
|
|
err = map_grant_pages(map);
|
|
- if (err) {
|
|
- printk(KERN_WARNING "map_grant_pages() failure.\n");
|
|
- return err;
|
|
- }
|
|
+ if (err)
|
|
+ goto out_put_map;
|
|
|
|
- map->is_mapped = 1;
|
|
+ if (!use_ptemod) {
|
|
+ for (i = 0; i < count; i++) {
|
|
+ err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
|
|
+ map->pages[i]);
|
|
+ if (err)
|
|
+ goto out_put_map;
|
|
+ }
|
|
+ }
|
|
|
|
return 0;
|
|
|
|
unlock_out:
|
|
spin_unlock(&priv->lock);
|
|
return err;
|
|
+
|
|
+out_put_map:
|
|
+ if (use_ptemod)
|
|
+ map->vma = NULL;
|
|
+ gntdev_put_map(map);
|
|
+ return err;
|
|
}
|
|
|
|
static const struct file_operations gntdev_fops = {
|
|
@@ -646,6 +759,8 @@ static int __init gntdev_init(void)
|
|
if (!xen_domain())
|
|
return -ENODEV;
|
|
|
|
+ use_ptemod = xen_pv_domain();
|
|
+
|
|
err = misc_register(&gntdev_miscdev);
|
|
if (err != 0) {
|
|
printk(KERN_ERR "Could not register gntdev device\n");
|
|
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
|
|
index 9ef54eb..1a9bc2b 100644
|
|
--- a/drivers/xen/grant-table.c
|
|
+++ b/drivers/xen/grant-table.c
|
|
@@ -458,14 +458,19 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
|
|
if (ret)
|
|
return ret;
|
|
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ return ret;
|
|
+
|
|
for (i = 0; i < count; i++) {
|
|
- /* m2p override only supported for GNTMAP_contains_pte mappings */
|
|
- if (!(map_ops[i].flags & GNTMAP_contains_pte))
|
|
- continue;
|
|
- pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
|
|
+ if (map_ops[i].flags & GNTMAP_contains_pte) {
|
|
+ pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
|
|
(map_ops[i].host_addr & ~PAGE_MASK));
|
|
- mfn = pte_mfn(*pte);
|
|
- ret = m2p_add_override(mfn, pages[i]);
|
|
+ mfn = pte_mfn(*pte);
|
|
+ } else {
|
|
+ mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
|
|
+ }
|
|
+ ret = m2p_add_override(mfn, pages[i],
|
|
+ map_ops[i].flags & GNTMAP_contains_pte);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
@@ -483,8 +488,13 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
|
|
if (ret)
|
|
return ret;
|
|
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ return ret;
|
|
+
|
|
for (i = 0; i < count; i++) {
|
|
- ret = m2p_remove_override(pages[i]);
|
|
+ /* We do not have the means of checking if GNTMAP_contains_pte
|
|
+ * is set. */
|
|
+ ret = m2p_remove_override(pages[i], true /* clear the PTE */);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
|
|
index 2417727..ebb2928 100644
|
|
--- a/drivers/xen/manage.c
|
|
+++ b/drivers/xen/manage.c
|
|
@@ -34,42 +34,38 @@ enum shutdown_state {
|
|
/* Ignore multiple shutdown requests. */
|
|
static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
|
|
|
|
-#ifdef CONFIG_PM_SLEEP
|
|
-static int xen_hvm_suspend(void *data)
|
|
-{
|
|
- int err;
|
|
- struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
|
|
- int *cancelled = data;
|
|
-
|
|
- BUG_ON(!irqs_disabled());
|
|
-
|
|
- err = sysdev_suspend(PMSG_SUSPEND);
|
|
- if (err) {
|
|
- printk(KERN_ERR "xen_hvm_suspend: sysdev_suspend failed: %d\n",
|
|
- err);
|
|
- return err;
|
|
- }
|
|
-
|
|
- *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
|
|
+struct suspend_info {
|
|
+ int cancelled;
|
|
+ unsigned long arg; /* extra hypercall argument */
|
|
+ void (*pre)(void);
|
|
+ void (*post)(int cancelled);
|
|
+};
|
|
|
|
- xen_hvm_post_suspend(*cancelled);
|
|
+static void xen_hvm_post_suspend(int cancelled)
|
|
+{
|
|
+ xen_arch_hvm_post_suspend(cancelled);
|
|
gnttab_resume();
|
|
+}
|
|
|
|
- if (!*cancelled) {
|
|
- xen_irq_resume();
|
|
- xen_console_resume();
|
|
- xen_timer_resume();
|
|
- }
|
|
-
|
|
- sysdev_resume();
|
|
+static void xen_pre_suspend(void)
|
|
+{
|
|
+ xen_mm_pin_all();
|
|
+ gnttab_suspend();
|
|
+ xen_arch_pre_suspend();
|
|
+}
|
|
|
|
- return 0;
|
|
+static void xen_post_suspend(int cancelled)
|
|
+{
|
|
+ xen_arch_post_suspend(cancelled);
|
|
+ gnttab_resume();
|
|
+ xen_mm_unpin_all();
|
|
}
|
|
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
static int xen_suspend(void *data)
|
|
{
|
|
+ struct suspend_info *si = data;
|
|
int err;
|
|
- int *cancelled = data;
|
|
|
|
BUG_ON(!irqs_disabled());
|
|
|
|
@@ -80,22 +76,20 @@ static int xen_suspend(void *data)
|
|
return err;
|
|
}
|
|
|
|
- xen_mm_pin_all();
|
|
- gnttab_suspend();
|
|
- xen_pre_suspend();
|
|
+ if (si->pre)
|
|
+ si->pre();
|
|
|
|
/*
|
|
* This hypercall returns 1 if suspend was cancelled
|
|
* or the domain was merely checkpointed, and 0 if it
|
|
* is resuming in a new domain.
|
|
*/
|
|
- *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
|
|
+ si->cancelled = HYPERVISOR_suspend(si->arg);
|
|
|
|
- xen_post_suspend(*cancelled);
|
|
- gnttab_resume();
|
|
- xen_mm_unpin_all();
|
|
+ if (si->post)
|
|
+ si->post(si->cancelled);
|
|
|
|
- if (!*cancelled) {
|
|
+ if (!si->cancelled) {
|
|
xen_irq_resume();
|
|
xen_console_resume();
|
|
xen_timer_resume();
|
|
@@ -109,7 +103,7 @@ static int xen_suspend(void *data)
|
|
static void do_suspend(void)
|
|
{
|
|
int err;
|
|
- int cancelled = 1;
|
|
+ struct suspend_info si;
|
|
|
|
shutting_down = SHUTDOWN_SUSPEND;
|
|
|
|
@@ -139,20 +133,29 @@ static void do_suspend(void)
|
|
goto out_resume;
|
|
}
|
|
|
|
- if (xen_hvm_domain())
|
|
- err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0));
|
|
- else
|
|
- err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
|
|
+ si.cancelled = 1;
|
|
+
|
|
+ if (xen_hvm_domain()) {
|
|
+ si.arg = 0UL;
|
|
+ si.pre = NULL;
|
|
+ si.post = &xen_hvm_post_suspend;
|
|
+ } else {
|
|
+ si.arg = virt_to_mfn(xen_start_info);
|
|
+ si.pre = &xen_pre_suspend;
|
|
+ si.post = &xen_post_suspend;
|
|
+ }
|
|
+
|
|
+ err = stop_machine(xen_suspend, &si, cpumask_of(0));
|
|
|
|
dpm_resume_noirq(PMSG_RESUME);
|
|
|
|
if (err) {
|
|
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
|
|
- cancelled = 1;
|
|
+ si.cancelled = 1;
|
|
}
|
|
|
|
out_resume:
|
|
- if (!cancelled) {
|
|
+ if (!si.cancelled) {
|
|
xen_arch_resume();
|
|
xs_resume();
|
|
} else
|
|
@@ -172,12 +175,39 @@ out:
|
|
}
|
|
#endif /* CONFIG_PM_SLEEP */
|
|
|
|
+struct shutdown_handler {
|
|
+ const char *command;
|
|
+ void (*cb)(void);
|
|
+};
|
|
+
|
|
+static void do_poweroff(void)
|
|
+{
|
|
+ shutting_down = SHUTDOWN_POWEROFF;
|
|
+ orderly_poweroff(false);
|
|
+}
|
|
+
|
|
+static void do_reboot(void)
|
|
+{
|
|
+ shutting_down = SHUTDOWN_POWEROFF; /* ? */
|
|
+ ctrl_alt_del();
|
|
+}
|
|
+
|
|
static void shutdown_handler(struct xenbus_watch *watch,
|
|
const char **vec, unsigned int len)
|
|
{
|
|
char *str;
|
|
struct xenbus_transaction xbt;
|
|
int err;
|
|
+ static struct shutdown_handler handlers[] = {
|
|
+ { "poweroff", do_poweroff },
|
|
+ { "halt", do_poweroff },
|
|
+ { "reboot", do_reboot },
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
+ { "suspend", do_suspend },
|
|
+#endif
|
|
+ {NULL, NULL},
|
|
+ };
|
|
+ static struct shutdown_handler *handler;
|
|
|
|
if (shutting_down != SHUTDOWN_INVALID)
|
|
return;
|
|
@@ -194,7 +224,14 @@ static void shutdown_handler(struct xenbus_watch *watch,
|
|
return;
|
|
}
|
|
|
|
- xenbus_write(xbt, "control", "shutdown", "");
|
|
+ for (handler = &handlers[0]; handler->command; handler++) {
|
|
+ if (strcmp(str, handler->command) == 0)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Only acknowledge commands which we are prepared to handle. */
|
|
+ if (handler->cb)
|
|
+ xenbus_write(xbt, "control", "shutdown", "");
|
|
|
|
err = xenbus_transaction_end(xbt, 0);
|
|
if (err == -EAGAIN) {
|
|
@@ -202,17 +239,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
|
|
goto again;
|
|
}
|
|
|
|
- if (strcmp(str, "poweroff") == 0 ||
|
|
- strcmp(str, "halt") == 0) {
|
|
- shutting_down = SHUTDOWN_POWEROFF;
|
|
- orderly_poweroff(false);
|
|
- } else if (strcmp(str, "reboot") == 0) {
|
|
- shutting_down = SHUTDOWN_POWEROFF; /* ? */
|
|
- ctrl_alt_del();
|
|
-#ifdef CONFIG_PM_SLEEP
|
|
- } else if (strcmp(str, "suspend") == 0) {
|
|
- do_suspend();
|
|
-#endif
|
|
+ if (handler->cb) {
|
|
+ handler->cb();
|
|
} else {
|
|
printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
|
|
shutting_down = SHUTDOWN_INVALID;
|
|
@@ -291,27 +319,18 @@ static int shutdown_event(struct notifier_block *notifier,
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
-static int __init __setup_shutdown_event(void)
|
|
-{
|
|
- /* Delay initialization in the PV on HVM case */
|
|
- if (xen_hvm_domain())
|
|
- return 0;
|
|
-
|
|
- if (!xen_pv_domain())
|
|
- return -ENODEV;
|
|
-
|
|
- return xen_setup_shutdown_event();
|
|
-}
|
|
-
|
|
int xen_setup_shutdown_event(void)
|
|
{
|
|
static struct notifier_block xenstore_notifier = {
|
|
.notifier_call = shutdown_event
|
|
};
|
|
+
|
|
+ if (!xen_domain())
|
|
+ return -ENODEV;
|
|
register_xenstore_notifier(&xenstore_notifier);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
|
|
|
|
-subsys_initcall(__setup_shutdown_event);
|
|
+subsys_initcall(xen_setup_shutdown_event);
|
|
diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
|
|
new file mode 100644
|
|
index 0000000..38bc123
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/Makefile
|
|
@@ -0,0 +1,17 @@
|
|
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
|
|
+
|
|
+xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
|
|
+xen-pciback-y += conf_space.o conf_space_header.o \
|
|
+ conf_space_capability.o \
|
|
+ conf_space_capability_vpd.o \
|
|
+ conf_space_capability_pm.o \
|
|
+ conf_space_quirks.o
|
|
+xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
|
|
+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
|
|
+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
|
|
+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
|
|
+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
|
|
+
|
|
+ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
|
|
+EXTRA_CFLAGS += -DDEBUG
|
|
+endif
|
|
diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
|
|
new file mode 100644
|
|
index 0000000..eb6bba0
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/conf_space.c
|
|
@@ -0,0 +1,435 @@
|
|
+/*
|
|
+ * PCI Backend - Functions for creating a virtual configuration space for
|
|
+ * exported PCI Devices.
|
|
+ * It's dangerous to allow PCI Driver Domains to change their
|
|
+ * device's resources (memory, i/o ports, interrupts). We need to
|
|
+ * restrict changes to certain PCI Configuration registers:
|
|
+ * BARs, INTERRUPT_PIN, most registers in the header...
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/pci.h>
|
|
+#include "pciback.h"
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_quirks.h"
|
|
+
|
|
+static int permissive;
|
|
+module_param(permissive, bool, 0644);
|
|
+
|
|
+#define DEFINE_PCI_CONFIG(op, size, type) \
|
|
+int pciback_##op##_config_##size \
|
|
+(struct pci_dev *dev, int offset, type value, void *data) \
|
|
+{ \
|
|
+ return pci_##op##_config_##size(dev, offset, value); \
|
|
+}
|
|
+
|
|
+DEFINE_PCI_CONFIG(read, byte, u8 *)
|
|
+DEFINE_PCI_CONFIG(read, word, u16 *)
|
|
+DEFINE_PCI_CONFIG(read, dword, u32 *)
|
|
+
|
|
+DEFINE_PCI_CONFIG(write, byte, u8)
|
|
+DEFINE_PCI_CONFIG(write, word, u16)
|
|
+DEFINE_PCI_CONFIG(write, dword, u32)
|
|
+
|
|
+static int conf_space_read(struct pci_dev *dev,
|
|
+ const struct config_field_entry *entry,
|
|
+ int offset, u32 *value)
|
|
+{
|
|
+ int ret = 0;
|
|
+ const struct config_field *field = entry->field;
|
|
+
|
|
+ *value = 0;
|
|
+
|
|
+ switch (field->size) {
|
|
+ case 1:
|
|
+ if (field->u.b.read)
|
|
+ ret = field->u.b.read(dev, offset, (u8 *) value,
|
|
+ entry->data);
|
|
+ break;
|
|
+ case 2:
|
|
+ if (field->u.w.read)
|
|
+ ret = field->u.w.read(dev, offset, (u16 *) value,
|
|
+ entry->data);
|
|
+ break;
|
|
+ case 4:
|
|
+ if (field->u.dw.read)
|
|
+ ret = field->u.dw.read(dev, offset, value, entry->data);
|
|
+ break;
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int conf_space_write(struct pci_dev *dev,
|
|
+ const struct config_field_entry *entry,
|
|
+ int offset, u32 value)
|
|
+{
|
|
+ int ret = 0;
|
|
+ const struct config_field *field = entry->field;
|
|
+
|
|
+ switch (field->size) {
|
|
+ case 1:
|
|
+ if (field->u.b.write)
|
|
+ ret = field->u.b.write(dev, offset, (u8) value,
|
|
+ entry->data);
|
|
+ break;
|
|
+ case 2:
|
|
+ if (field->u.w.write)
|
|
+ ret = field->u.w.write(dev, offset, (u16) value,
|
|
+ entry->data);
|
|
+ break;
|
|
+ case 4:
|
|
+ if (field->u.dw.write)
|
|
+ ret = field->u.dw.write(dev, offset, value,
|
|
+ entry->data);
|
|
+ break;
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static inline u32 get_mask(int size)
|
|
+{
|
|
+ if (size == 1)
|
|
+ return 0xff;
|
|
+ else if (size == 2)
|
|
+ return 0xffff;
|
|
+ else
|
|
+ return 0xffffffff;
|
|
+}
|
|
+
|
|
+static inline int valid_request(int offset, int size)
|
|
+{
|
|
+ /* Validate request (no un-aligned requests) */
|
|
+ if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
|
|
+ return 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
|
|
+ int offset)
|
|
+{
|
|
+ if (offset >= 0) {
|
|
+ new_val_mask <<= (offset * 8);
|
|
+ new_val <<= (offset * 8);
|
|
+ } else {
|
|
+ new_val_mask >>= (offset * -8);
|
|
+ new_val >>= (offset * -8);
|
|
+ }
|
|
+ val = (val & ~new_val_mask) | (new_val & new_val_mask);
|
|
+
|
|
+ return val;
|
|
+}
|
|
+
|
|
+static int pcibios_err_to_errno(int err)
|
|
+{
|
|
+ switch (err) {
|
|
+ case PCIBIOS_SUCCESSFUL:
|
|
+ return XEN_PCI_ERR_success;
|
|
+ case PCIBIOS_DEVICE_NOT_FOUND:
|
|
+ return XEN_PCI_ERR_dev_not_found;
|
|
+ case PCIBIOS_BAD_REGISTER_NUMBER:
|
|
+ return XEN_PCI_ERR_invalid_offset;
|
|
+ case PCIBIOS_FUNC_NOT_SUPPORTED:
|
|
+ return XEN_PCI_ERR_not_implemented;
|
|
+ case PCIBIOS_SET_FAILED:
|
|
+ return XEN_PCI_ERR_access_denied;
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int pciback_config_read(struct pci_dev *dev, int offset, int size,
|
|
+ u32 *ret_val)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ const struct config_field_entry *cfg_entry;
|
|
+ const struct config_field *field;
|
|
+ int req_start, req_end, field_start, field_end;
|
|
+ /* if read fails for any reason, return 0
|
|
+ * (as if device didn't respond) */
|
|
+ u32 value = 0, tmp_val;
|
|
+
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
|
|
+ pci_name(dev), size, offset);
|
|
+
|
|
+ if (!valid_request(offset, size)) {
|
|
+ err = XEN_PCI_ERR_invalid_offset;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Get the real value first, then modify as appropriate */
|
|
+ switch (size) {
|
|
+ case 1:
|
|
+ err = pci_read_config_byte(dev, offset, (u8 *) &value);
|
|
+ break;
|
|
+ case 2:
|
|
+ err = pci_read_config_word(dev, offset, (u16 *) &value);
|
|
+ break;
|
|
+ case 4:
|
|
+ err = pci_read_config_dword(dev, offset, &value);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
|
+ field = cfg_entry->field;
|
|
+
|
|
+ req_start = offset;
|
|
+ req_end = offset + size;
|
|
+ field_start = OFFSET(cfg_entry);
|
|
+ field_end = OFFSET(cfg_entry) + field->size;
|
|
+
|
|
+ if ((req_start >= field_start && req_start < field_end)
|
|
+ || (req_end > field_start && req_end <= field_end)) {
|
|
+ err = conf_space_read(dev, cfg_entry, field_start,
|
|
+ &tmp_val);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ value = merge_value(value, tmp_val,
|
|
+ get_mask(field->size),
|
|
+ field_start - req_start);
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
|
|
+ pci_name(dev), size, offset, value);
|
|
+
|
|
+ *ret_val = value;
|
|
+ return pcibios_err_to_errno(err);
|
|
+}
|
|
+
|
|
+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
|
|
+{
|
|
+ int err = 0, handled = 0;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ const struct config_field_entry *cfg_entry;
|
|
+ const struct config_field *field;
|
|
+ u32 tmp_val;
|
|
+ int req_start, req_end, field_start, field_end;
|
|
+
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG
|
|
+ "pciback: %s: write request %d bytes at 0x%x = %x\n",
|
|
+ pci_name(dev), size, offset, value);
|
|
+
|
|
+ if (!valid_request(offset, size))
|
|
+ return XEN_PCI_ERR_invalid_offset;
|
|
+
|
|
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
|
+ field = cfg_entry->field;
|
|
+
|
|
+ req_start = offset;
|
|
+ req_end = offset + size;
|
|
+ field_start = OFFSET(cfg_entry);
|
|
+ field_end = OFFSET(cfg_entry) + field->size;
|
|
+
|
|
+ if ((req_start >= field_start && req_start < field_end)
|
|
+ || (req_end > field_start && req_end <= field_end)) {
|
|
+ tmp_val = 0;
|
|
+
|
|
+ err = pciback_config_read(dev, field_start,
|
|
+ field->size, &tmp_val);
|
|
+ if (err)
|
|
+ break;
|
|
+
|
|
+ tmp_val = merge_value(tmp_val, value, get_mask(size),
|
|
+ req_start - field_start);
|
|
+
|
|
+ err = conf_space_write(dev, cfg_entry, field_start,
|
|
+ tmp_val);
|
|
+
|
|
+ /* handled is set true here, but not every byte
|
|
+ * may have been written! Properly detecting if
|
|
+ * every byte is handled is unnecessary as the
|
|
+ * flag is used to detect devices that need
|
|
+ * special helpers to work correctly.
|
|
+ */
|
|
+ handled = 1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!handled && !err) {
|
|
+ /* By default, anything not specificially handled above is
|
|
+ * read-only. The permissive flag changes this behavior so
|
|
+ * that anything not specifically handled above is writable.
|
|
+ * This means that some fields may still be read-only because
|
|
+ * they have entries in the config_field list that intercept
|
|
+ * the write and do nothing. */
|
|
+ if (dev_data->permissive || permissive) {
|
|
+ switch (size) {
|
|
+ case 1:
|
|
+ err = pci_write_config_byte(dev, offset,
|
|
+ (u8) value);
|
|
+ break;
|
|
+ case 2:
|
|
+ err = pci_write_config_word(dev, offset,
|
|
+ (u16) value);
|
|
+ break;
|
|
+ case 4:
|
|
+ err = pci_write_config_dword(dev, offset,
|
|
+ (u32) value);
|
|
+ break;
|
|
+ }
|
|
+ } else if (!dev_data->warned_on_write) {
|
|
+ dev_data->warned_on_write = 1;
|
|
+ dev_warn(&dev->dev, "Driver tried to write to a "
|
|
+ "read-only configuration space field at offset"
|
|
+ " 0x%x, size %d. This may be harmless, but if "
|
|
+ "you have problems with your device:\n"
|
|
+ "1) see permissive attribute in sysfs\n"
|
|
+ "2) report problems to the xen-devel "
|
|
+ "mailing list along with details of your "
|
|
+ "device obtained from lspci.\n", offset, size);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return pcibios_err_to_errno(err);
|
|
+}
|
|
+
|
|
+void pciback_config_free_dyn_fields(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ struct config_field_entry *cfg_entry, *t;
|
|
+ const struct config_field *field;
|
|
+
|
|
+ dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
|
|
+ "configuration space fields\n");
|
|
+ if (!dev_data)
|
|
+ return;
|
|
+
|
|
+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
|
|
+ field = cfg_entry->field;
|
|
+
|
|
+ if (field->clean) {
|
|
+ field->clean((struct config_field *)field);
|
|
+
|
|
+ kfree(cfg_entry->data);
|
|
+
|
|
+ list_del(&cfg_entry->list);
|
|
+ kfree(cfg_entry);
|
|
+ }
|
|
+
|
|
+ }
|
|
+}
|
|
+
|
|
+void pciback_config_reset_dev(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ const struct config_field_entry *cfg_entry;
|
|
+ const struct config_field *field;
|
|
+
|
|
+ dev_dbg(&dev->dev, "resetting virtual configuration space\n");
|
|
+ if (!dev_data)
|
|
+ return;
|
|
+
|
|
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
|
+ field = cfg_entry->field;
|
|
+
|
|
+ if (field->reset)
|
|
+ field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
|
|
+ }
|
|
+}
|
|
+
|
|
+void pciback_config_free_dev(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ struct config_field_entry *cfg_entry, *t;
|
|
+ const struct config_field *field;
|
|
+
|
|
+ dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
|
|
+ if (!dev_data)
|
|
+ return;
|
|
+
|
|
+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
|
|
+ list_del(&cfg_entry->list);
|
|
+
|
|
+ field = cfg_entry->field;
|
|
+
|
|
+ if (field->release)
|
|
+ field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
|
|
+
|
|
+ kfree(cfg_entry);
|
|
+ }
|
|
+}
|
|
+
|
|
+int pciback_config_add_field_offset(struct pci_dev *dev,
|
|
+ const struct config_field *field,
|
|
+ unsigned int base_offset)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ struct config_field_entry *cfg_entry;
|
|
+ void *tmp;
|
|
+
|
|
+ cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
|
|
+ if (!cfg_entry) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ cfg_entry->data = NULL;
|
|
+ cfg_entry->field = field;
|
|
+ cfg_entry->base_offset = base_offset;
|
|
+
|
|
+ /* silently ignore duplicate fields */
|
|
+ err = pciback_field_is_dup(dev, OFFSET(cfg_entry));
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ if (field->init) {
|
|
+ tmp = field->init(dev, OFFSET(cfg_entry));
|
|
+
|
|
+ if (IS_ERR(tmp)) {
|
|
+ err = PTR_ERR(tmp);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ cfg_entry->data = tmp;
|
|
+ }
|
|
+
|
|
+ dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
|
|
+ OFFSET(cfg_entry));
|
|
+ list_add_tail(&cfg_entry->list, &dev_data->config_fields);
|
|
+
|
|
+out:
|
|
+ if (err)
|
|
+ kfree(cfg_entry);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/* This sets up the device's virtual configuration space to keep track of
|
|
+ * certain registers (like the base address registers (BARs) so that we can
|
|
+ * keep the client from manipulating them directly.
|
|
+ */
|
|
+int pciback_config_init_dev(struct pci_dev *dev)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+
|
|
+ dev_dbg(&dev->dev, "initializing virtual configuration space\n");
|
|
+
|
|
+ INIT_LIST_HEAD(&dev_data->config_fields);
|
|
+
|
|
+ err = pciback_config_header_add_fields(dev);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = pciback_config_capability_add_fields(dev);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = pciback_config_quirks_init(dev);
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int pciback_config_init(void)
|
|
+{
|
|
+ return pciback_config_capability_init();
|
|
+}
|
|
diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
|
|
new file mode 100644
|
|
index 0000000..50ebef2
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/conf_space.h
|
|
@@ -0,0 +1,126 @@
|
|
+/*
|
|
+ * PCI Backend - Common data structures for overriding the configuration space
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_PCIBACK_CONF_SPACE_H__
|
|
+#define __XEN_PCIBACK_CONF_SPACE_H__
|
|
+
|
|
+#include <linux/list.h>
|
|
+#include <linux/err.h>
|
|
+
|
|
+/* conf_field_init can return an errno in a ptr with ERR_PTR() */
|
|
+typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
|
|
+typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
|
|
+typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
|
|
+
|
|
+typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
|
|
+ void *data);
|
|
+typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
|
|
+ void *data);
|
|
+typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
|
|
+ void *data);
|
|
+typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
|
|
+ void *data);
|
|
+typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
|
|
+ void *data);
|
|
+typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
|
|
+ void *data);
|
|
+
|
|
+/* These are the fields within the configuration space which we
|
|
+ * are interested in intercepting reads/writes to and changing their
|
|
+ * values.
|
|
+ */
|
|
+struct config_field {
|
|
+ unsigned int offset;
|
|
+ unsigned int size;
|
|
+ unsigned int mask;
|
|
+ conf_field_init init;
|
|
+ conf_field_reset reset;
|
|
+ conf_field_free release;
|
|
+ void (*clean) (struct config_field *field);
|
|
+ union {
|
|
+ struct {
|
|
+ conf_dword_write write;
|
|
+ conf_dword_read read;
|
|
+ } dw;
|
|
+ struct {
|
|
+ conf_word_write write;
|
|
+ conf_word_read read;
|
|
+ } w;
|
|
+ struct {
|
|
+ conf_byte_write write;
|
|
+ conf_byte_read read;
|
|
+ } b;
|
|
+ } u;
|
|
+ struct list_head list;
|
|
+};
|
|
+
|
|
+struct config_field_entry {
|
|
+ struct list_head list;
|
|
+ const struct config_field *field;
|
|
+ unsigned int base_offset;
|
|
+ void *data;
|
|
+};
|
|
+
|
|
+#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
|
|
+
|
|
+/* Add fields to a device - the add_fields macro expects to get a pointer to
|
|
+ * the first entry in an array (of which the ending is marked by size==0)
|
|
+ */
|
|
+int pciback_config_add_field_offset(struct pci_dev *dev,
|
|
+ const struct config_field *field,
|
|
+ unsigned int offset);
|
|
+
|
|
+static inline int pciback_config_add_field(struct pci_dev *dev,
|
|
+ const struct config_field *field)
|
|
+{
|
|
+ return pciback_config_add_field_offset(dev, field, 0);
|
|
+}
|
|
+
|
|
+static inline int pciback_config_add_fields(struct pci_dev *dev,
|
|
+ const struct config_field *field)
|
|
+{
|
|
+ int i, err = 0;
|
|
+ for (i = 0; field[i].size != 0; i++) {
|
|
+ err = pciback_config_add_field(dev, &field[i]);
|
|
+ if (err)
|
|
+ break;
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
|
|
+ const struct config_field *field,
|
|
+ unsigned int offset)
|
|
+{
|
|
+ int i, err = 0;
|
|
+ for (i = 0; field[i].size != 0; i++) {
|
|
+ err = pciback_config_add_field_offset(dev, &field[i], offset);
|
|
+ if (err)
|
|
+ break;
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/* Read/Write the real configuration space */
|
|
+int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
|
|
+ void *data);
|
|
+int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value,
|
|
+ void *data);
|
|
+int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
|
|
+ void *data);
|
|
+int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
|
|
+ void *data);
|
|
+int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
|
|
+ void *data);
|
|
+int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
|
|
+ void *data);
|
|
+
|
|
+int pciback_config_capability_init(void);
|
|
+
|
|
+int pciback_config_header_add_fields(struct pci_dev *dev);
|
|
+int pciback_config_capability_add_fields(struct pci_dev *dev);
|
|
+
|
|
+#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
|
|
diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
|
|
new file mode 100644
|
|
index 0000000..0ea84d6
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/conf_space_capability.c
|
|
@@ -0,0 +1,66 @@
|
|
+/*
|
|
+ * PCI Backend - Handles the virtual fields found on the capability lists
|
|
+ * in the configuration space.
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/pci.h>
|
|
+#include "pciback.h"
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_capability.h"
|
|
+
|
|
+static LIST_HEAD(capabilities);
|
|
+
|
|
+static const struct config_field caplist_header[] = {
|
|
+ {
|
|
+ .offset = PCI_CAP_LIST_ID,
|
|
+ .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
|
|
+ .u.w.read = pciback_read_config_word,
|
|
+ .u.w.write = NULL,
|
|
+ },
|
|
+ {}
|
|
+};
|
|
+
|
|
+static inline void register_capability(struct pciback_config_capability *cap)
|
|
+{
|
|
+ list_add_tail(&cap->cap_list, &capabilities);
|
|
+}
|
|
+
|
|
+int pciback_config_capability_add_fields(struct pci_dev *dev)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pciback_config_capability *cap;
|
|
+ int cap_offset;
|
|
+
|
|
+ list_for_each_entry(cap, &capabilities, cap_list) {
|
|
+ cap_offset = pci_find_capability(dev, cap->capability);
|
|
+ if (cap_offset) {
|
|
+ dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
|
|
+ cap->capability, cap_offset);
|
|
+
|
|
+ err = pciback_config_add_fields_offset(dev,
|
|
+ caplist_header,
|
|
+ cap_offset);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ err = pciback_config_add_fields_offset(dev,
|
|
+ cap->fields,
|
|
+ cap_offset);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int pciback_config_capability_init(void)
|
|
+{
|
|
+ register_capability(&pciback_config_capability_vpd);
|
|
+ register_capability(&pciback_config_capability_pm);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
|
|
new file mode 100644
|
|
index 0000000..8da3ac4
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/conf_space_capability.h
|
|
@@ -0,0 +1,26 @@
|
|
+/*
|
|
+ * PCI Backend - Data structures for special overlays for structures on
|
|
+ * the capability list.
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
|
|
+#define __PCIBACK_CONFIG_CAPABILITY_H__
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include <linux/list.h>
|
|
+
|
|
+struct pciback_config_capability {
|
|
+ struct list_head cap_list;
|
|
+
|
|
+ int capability;
|
|
+
|
|
+ /* If the device has the capability found above, add these fields */
|
|
+ const struct config_field *fields;
|
|
+};
|
|
+
|
|
+extern struct pciback_config_capability pciback_config_capability_vpd;
|
|
+extern struct pciback_config_capability pciback_config_capability_pm;
|
|
+
|
|
+#endif
|
|
diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
|
|
new file mode 100644
|
|
index 0000000..041e4aa
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/conf_space_capability_msi.c
|
|
@@ -0,0 +1,136 @@
|
|
+/*
|
|
+ * PCI Backend -- Configuration overlay for MSI capability
|
|
+ */
|
|
+#include <linux/pci.h>
|
|
+#include <linux/slab.h>
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_capability.h"
|
|
+#include <xen/interface/io/pciif.h>
|
|
+#include <xen/events.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+int pciback_enable_msi(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ int otherend = pdev->xdev->otherend_id;
|
|
+ int status;
|
|
+
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));
|
|
+
|
|
+ status = pci_enable_msi(dev);
|
|
+
|
|
+ if (status) {
|
|
+ printk(KERN_ERR "error enable msi for guest %x status %x\n",
|
|
+ otherend, status);
|
|
+ op->value = 0;
|
|
+ return XEN_PCI_ERR_op_failed;
|
|
+ }
|
|
+
|
|
+ /* The value the guest needs is actually the IDT vector, not the
|
|
+ * the local domain's IRQ number. */
|
|
+
|
|
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: MSI: %d\n", pci_name(dev),
|
|
+ op->value);
|
|
+
|
|
+ dev_data = pci_get_drvdata(dev);
|
|
+ if (dev_data)
|
|
+ dev_data->ack_intr = 0;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_disable_msi(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data;
|
|
+
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
|
|
+ pci_disable_msi(dev);
|
|
+
|
|
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: MSI: %d\n", pci_name(dev),
|
|
+ op->value);
|
|
+ dev_data = pci_get_drvdata(dev);
|
|
+ if (dev_data)
|
|
+ dev_data->ack_intr = 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_enable_msix(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ int i, result;
|
|
+ struct msix_entry *entries;
|
|
+
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
|
|
+ if (op->value > SH_INFO_MAX_VEC)
|
|
+ return -EINVAL;
|
|
+
|
|
+ entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
|
|
+ if (entries == NULL)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ for (i = 0; i < op->value; i++) {
|
|
+ entries[i].entry = op->msix_entries[i].entry;
|
|
+ entries[i].vector = op->msix_entries[i].vector;
|
|
+ }
|
|
+
|
|
+ result = pci_enable_msix(dev, entries, op->value);
|
|
+
|
|
+ if (result == 0) {
|
|
+ for (i = 0; i < op->value; i++) {
|
|
+ op->msix_entries[i].entry = entries[i].entry;
|
|
+ if (entries[i].vector)
|
|
+ op->msix_entries[i].vector =
|
|
+ xen_pirq_from_irq(entries[i].vector);
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: " \
|
|
+ "MSI-X[%d]: %d\n",
|
|
+ pci_name(dev), i,
|
|
+ op->msix_entries[i].vector);
|
|
+ }
|
|
+ } else {
|
|
+ printk(KERN_WARNING "pciback: %s: failed to enable MSI-X: err %d!\n",
|
|
+ pci_name(dev), result);
|
|
+ }
|
|
+ kfree(entries);
|
|
+
|
|
+ op->value = result;
|
|
+ dev_data = pci_get_drvdata(dev);
|
|
+ if (dev_data)
|
|
+ dev_data->ack_intr = 0;
|
|
+
|
|
+ return result;
|
|
+}
|
|
+
|
|
+int pciback_disable_msix(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: disable MSI-X\n",
|
|
+ pci_name(dev));
|
|
+ pci_disable_msix(dev);
|
|
+
|
|
+ /*
|
|
+ * SR-IOV devices (which don't have any legacy IRQ) have
|
|
+ * an undefined IRQ value of zero.
|
|
+ */
|
|
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: MSI-X: %d\n", pci_name(dev),
|
|
+ op->value);
|
|
+ dev_data = pci_get_drvdata(dev);
|
|
+ if (dev_data)
|
|
+ dev_data->ack_intr = 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
|
|
new file mode 100644
|
|
index 0000000..0442616
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/conf_space_capability_pm.c
|
|
@@ -0,0 +1,113 @@
|
|
+/*
|
|
+ * PCI Backend - Configuration space overlay for power management
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_capability.h"
|
|
+
|
|
+static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
|
|
+ void *data)
|
|
+{
|
|
+ int err;
|
|
+ u16 real_value;
|
|
+
|
|
+ err = pci_read_config_word(dev, offset, &real_value);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ *value = real_value & ~PCI_PM_CAP_PME_MASK;
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
|
|
+ * Can't allow driver domain to enable PMEs - they're shared */
|
|
+#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
|
|
+
|
|
+static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
|
|
+ void *data)
|
|
+{
|
|
+ int err;
|
|
+ u16 old_value;
|
|
+ pci_power_t new_state, old_state;
|
|
+
|
|
+ err = pci_read_config_word(dev, offset, &old_value);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
|
|
+ new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
|
|
+
|
|
+ new_value &= PM_OK_BITS;
|
|
+ if ((old_value & PM_OK_BITS) != new_value) {
|
|
+ new_value = (old_value & ~PM_OK_BITS) | new_value;
|
|
+ err = pci_write_config_word(dev, offset, new_value);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Let pci core handle the power management change */
|
|
+ dev_dbg(&dev->dev, "set power state to %x\n", new_state);
|
|
+ err = pci_set_power_state(dev, new_state);
|
|
+ if (err) {
|
|
+ err = PCIBIOS_SET_FAILED;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/* Ensure PMEs are disabled */
|
|
+static void *pm_ctrl_init(struct pci_dev *dev, int offset)
|
|
+{
|
|
+ int err;
|
|
+ u16 value;
|
|
+
|
|
+ err = pci_read_config_word(dev, offset, &value);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ if (value & PCI_PM_CTRL_PME_ENABLE) {
|
|
+ value &= ~PCI_PM_CTRL_PME_ENABLE;
|
|
+ err = pci_write_config_word(dev, offset, value);
|
|
+ }
|
|
+
|
|
+out:
|
|
+ return ERR_PTR(err);
|
|
+}
|
|
+
|
|
+static const struct config_field caplist_pm[] = {
|
|
+ {
|
|
+ .offset = PCI_PM_PMC,
|
|
+ .size = 2,
|
|
+ .u.w.read = pm_caps_read,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_PM_CTRL,
|
|
+ .size = 2,
|
|
+ .init = pm_ctrl_init,
|
|
+ .u.w.read = pciback_read_config_word,
|
|
+ .u.w.write = pm_ctrl_write,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_PM_PPB_EXTENSIONS,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_PM_DATA_REGISTER,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ },
|
|
+ {}
|
|
+};
|
|
+
|
|
+struct pciback_config_capability pciback_config_capability_pm = {
|
|
+ .capability = PCI_CAP_ID_PM,
|
|
+ .fields = caplist_pm,
|
|
+};
|
|
diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
|
|
new file mode 100644
|
|
index 0000000..e7b4d66
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/conf_space_capability_vpd.c
|
|
@@ -0,0 +1,40 @@
|
|
+/*
|
|
+ * PCI Backend - Configuration space overlay for Vital Product Data
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_capability.h"
|
|
+
|
|
+static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
|
|
+ void *data)
|
|
+{
|
|
+ /* Disallow writes to the vital product data */
|
|
+ if (value & PCI_VPD_ADDR_F)
|
|
+ return PCIBIOS_SET_FAILED;
|
|
+ else
|
|
+ return pci_write_config_word(dev, offset, value);
|
|
+}
|
|
+
|
|
+static const struct config_field caplist_vpd[] = {
|
|
+ {
|
|
+ .offset = PCI_VPD_ADDR,
|
|
+ .size = 2,
|
|
+ .u.w.read = pciback_read_config_word,
|
|
+ .u.w.write = vpd_address_write,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_VPD_DATA,
|
|
+ .size = 4,
|
|
+ .u.dw.read = pciback_read_config_dword,
|
|
+ .u.dw.write = NULL,
|
|
+ },
|
|
+ {}
|
|
+};
|
|
+
|
|
+struct pciback_config_capability pciback_config_capability_vpd = {
|
|
+ .capability = PCI_CAP_ID_VPD,
|
|
+ .fields = caplist_vpd,
|
|
+};
|
|
diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
|
|
new file mode 100644
|
|
index 0000000..22ad0f5
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/conf_space_header.c
|
|
@@ -0,0 +1,385 @@
|
|
+/*
|
|
+ * PCI Backend - Handles the virtual fields in the configuration space headers.
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/pci.h>
|
|
+#include "pciback.h"
|
|
+#include "conf_space.h"
|
|
+
|
|
+struct pci_bar_info {
|
|
+ u32 val;
|
|
+ u32 len_val;
|
|
+ int which;
|
|
+};
|
|
+
|
|
+#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
|
|
+#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
|
|
+
|
|
+static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
|
|
+{
|
|
+ int i;
|
|
+ int ret;
|
|
+
|
|
+ ret = pciback_read_config_word(dev, offset, value, data);
|
|
+ if (!atomic_read(&dev->enable_cnt))
|
|
+ return ret;
|
|
+
|
|
+ for (i = 0; i < PCI_ROM_RESOURCE; i++) {
|
|
+ if (dev->resource[i].flags & IORESOURCE_IO)
|
|
+ *value |= PCI_COMMAND_IO;
|
|
+ if (dev->resource[i].flags & IORESOURCE_MEM)
|
|
+ *value |= PCI_COMMAND_MEMORY;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ int err;
|
|
+
|
|
+ dev_data = pci_get_drvdata(dev);
|
|
+ if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: enable\n",
|
|
+ pci_name(dev));
|
|
+ err = pci_enable_device(dev);
|
|
+ if (err)
|
|
+ return err;
|
|
+ if (dev_data)
|
|
+ dev_data->enable_intx = 1;
|
|
+ } else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: disable\n",
|
|
+ pci_name(dev));
|
|
+ pci_disable_device(dev);
|
|
+ if (dev_data)
|
|
+ dev_data->enable_intx = 0;
|
|
+ }
|
|
+
|
|
+ if (!dev->is_busmaster && is_master_cmd(value)) {
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: set bus master\n",
|
|
+ pci_name(dev));
|
|
+ pci_set_master(dev);
|
|
+ }
|
|
+
|
|
+ if (value & PCI_COMMAND_INVALIDATE) {
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG
|
|
+ "pciback: %s: enable memory-write-invalidate\n",
|
|
+ pci_name(dev));
|
|
+ err = pci_set_mwi(dev);
|
|
+ if (err) {
|
|
+ printk(KERN_WARNING
|
|
+ "pciback: %s: cannot enable "
|
|
+ "memory-write-invalidate (%d)\n",
|
|
+ pci_name(dev), err);
|
|
+ value &= ~PCI_COMMAND_INVALIDATE;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return pci_write_config_word(dev, offset, value);
|
|
+}
|
|
+
|
|
+static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
|
|
+{
|
|
+ struct pci_bar_info *bar = data;
|
|
+
|
|
+ if (unlikely(!bar)) {
|
|
+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
|
|
+ pci_name(dev));
|
|
+ return XEN_PCI_ERR_op_failed;
|
|
+ }
|
|
+
|
|
+ /* A write to obtain the length must happen as a 32-bit write.
|
|
+ * This does not (yet) support writing individual bytes
|
|
+ */
|
|
+ if (value == ~PCI_ROM_ADDRESS_ENABLE)
|
|
+ bar->which = 1;
|
|
+ else {
|
|
+ u32 tmpval;
|
|
+ pci_read_config_dword(dev, offset, &tmpval);
|
|
+ if (tmpval != bar->val && value == bar->val) {
|
|
+ /* Allow restoration of bar value. */
|
|
+ pci_write_config_dword(dev, offset, bar->val);
|
|
+ }
|
|
+ bar->which = 0;
|
|
+ }
|
|
+
|
|
+ /* Do we need to support enabling/disabling the rom address here? */
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* For the BARs, only allow writes which write ~0 or
|
|
+ * the correct resource information
|
|
+ * (Needed for when the driver probes the resource usage)
|
|
+ */
|
|
+static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
|
|
+{
|
|
+ struct pci_bar_info *bar = data;
|
|
+
|
|
+ if (unlikely(!bar)) {
|
|
+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
|
|
+ pci_name(dev));
|
|
+ return XEN_PCI_ERR_op_failed;
|
|
+ }
|
|
+
|
|
+ /* A write to obtain the length must happen as a 32-bit write.
|
|
+ * This does not (yet) support writing individual bytes
|
|
+ */
|
|
+ if (value == ~0)
|
|
+ bar->which = 1;
|
|
+ else {
|
|
+ u32 tmpval;
|
|
+ pci_read_config_dword(dev, offset, &tmpval);
|
|
+ if (tmpval != bar->val && value == bar->val) {
|
|
+ /* Allow restoration of bar value. */
|
|
+ pci_write_config_dword(dev, offset, bar->val);
|
|
+ }
|
|
+ bar->which = 0;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
|
|
+{
|
|
+ struct pci_bar_info *bar = data;
|
|
+
|
|
+ if (unlikely(!bar)) {
|
|
+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
|
|
+ pci_name(dev));
|
|
+ return XEN_PCI_ERR_op_failed;
|
|
+ }
|
|
+
|
|
+ *value = bar->which ? bar->len_val : bar->val;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void read_dev_bar(struct pci_dev *dev,
|
|
+ struct pci_bar_info *bar_info, int offset,
|
|
+ u32 len_mask)
|
|
+{
|
|
+ int pos;
|
|
+ struct resource *res = dev->resource;
|
|
+
|
|
+ if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
|
|
+ pos = PCI_ROM_RESOURCE;
|
|
+ else {
|
|
+ pos = (offset - PCI_BASE_ADDRESS_0) / 4;
|
|
+ if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
|
|
+ PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
|
|
+ (PCI_BASE_ADDRESS_SPACE_MEMORY |
|
|
+ PCI_BASE_ADDRESS_MEM_TYPE_64))) {
|
|
+ bar_info->val = res[pos - 1].start >> 32;
|
|
+ bar_info->len_val = res[pos - 1].end >> 32;
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ bar_info->val = res[pos].start |
|
|
+ (res[pos].flags & PCI_REGION_FLAG_MASK);
|
|
+ bar_info->len_val = res[pos].end - res[pos].start + 1;
|
|
+}
|
|
+
|
|
+static void *bar_init(struct pci_dev *dev, int offset)
|
|
+{
|
|
+ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
|
|
+
|
|
+ if (!bar)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ read_dev_bar(dev, bar, offset, ~0);
|
|
+ bar->which = 0;
|
|
+
|
|
+ return bar;
|
|
+}
|
|
+
|
|
+static void *rom_init(struct pci_dev *dev, int offset)
|
|
+{
|
|
+ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
|
|
+
|
|
+ if (!bar)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
|
|
+ bar->which = 0;
|
|
+
|
|
+ return bar;
|
|
+}
|
|
+
|
|
+static void bar_reset(struct pci_dev *dev, int offset, void *data)
|
|
+{
|
|
+ struct pci_bar_info *bar = data;
|
|
+
|
|
+ bar->which = 0;
|
|
+}
|
|
+
|
|
+static void bar_release(struct pci_dev *dev, int offset, void *data)
|
|
+{
|
|
+ kfree(data);
|
|
+}
|
|
+
|
|
+static int pciback_read_vendor(struct pci_dev *dev, int offset,
|
|
+ u16 *value, void *data)
|
|
+{
|
|
+ *value = dev->vendor;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pciback_read_device(struct pci_dev *dev, int offset,
|
|
+ u16 *value, void *data)
|
|
+{
|
|
+ *value = dev->device;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
|
|
+ void *data)
|
|
+{
|
|
+ *value = (u8) dev->irq;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
|
|
+{
|
|
+ u8 cur_value;
|
|
+ int err;
|
|
+
|
|
+ err = pci_read_config_byte(dev, offset, &cur_value);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
|
|
+ || value == PCI_BIST_START)
|
|
+ err = pci_write_config_byte(dev, offset, value);
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static const struct config_field header_common[] = {
|
|
+ {
|
|
+ .offset = PCI_VENDOR_ID,
|
|
+ .size = 2,
|
|
+ .u.w.read = pciback_read_vendor,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_DEVICE_ID,
|
|
+ .size = 2,
|
|
+ .u.w.read = pciback_read_device,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_COMMAND,
|
|
+ .size = 2,
|
|
+ .u.w.read = command_read,
|
|
+ .u.w.write = command_write,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_INTERRUPT_LINE,
|
|
+ .size = 1,
|
|
+ .u.b.read = interrupt_read,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_INTERRUPT_PIN,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ },
|
|
+ {
|
|
+ /* Any side effects of letting driver domain control cache line? */
|
|
+ .offset = PCI_CACHE_LINE_SIZE,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ .u.b.write = pciback_write_config_byte,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_LATENCY_TIMER,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_BIST,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ .u.b.write = bist_write,
|
|
+ },
|
|
+ {}
|
|
+};
|
|
+
|
|
+#define CFG_FIELD_BAR(reg_offset) \
|
|
+ { \
|
|
+ .offset = reg_offset, \
|
|
+ .size = 4, \
|
|
+ .init = bar_init, \
|
|
+ .reset = bar_reset, \
|
|
+ .release = bar_release, \
|
|
+ .u.dw.read = bar_read, \
|
|
+ .u.dw.write = bar_write, \
|
|
+ }
|
|
+
|
|
+#define CFG_FIELD_ROM(reg_offset) \
|
|
+ { \
|
|
+ .offset = reg_offset, \
|
|
+ .size = 4, \
|
|
+ .init = rom_init, \
|
|
+ .reset = bar_reset, \
|
|
+ .release = bar_release, \
|
|
+ .u.dw.read = bar_read, \
|
|
+ .u.dw.write = rom_write, \
|
|
+ }
|
|
+
|
|
+static const struct config_field header_0[] = {
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
|
|
+ CFG_FIELD_ROM(PCI_ROM_ADDRESS),
|
|
+ {}
|
|
+};
|
|
+
|
|
+static const struct config_field header_1[] = {
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
|
|
+ CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
|
|
+ {}
|
|
+};
|
|
+
|
|
+int pciback_config_header_add_fields(struct pci_dev *dev)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = pciback_config_add_fields(dev, header_common);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ switch (dev->hdr_type) {
|
|
+ case PCI_HEADER_TYPE_NORMAL:
|
|
+ err = pciback_config_add_fields(dev, header_0);
|
|
+ break;
|
|
+
|
|
+ case PCI_HEADER_TYPE_BRIDGE:
|
|
+ err = pciback_config_add_fields(dev, header_1);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ err = -EINVAL;
|
|
+ printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
|
|
+ pci_name(dev), dev->hdr_type);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
|
|
new file mode 100644
|
|
index 0000000..45c31fb
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/conf_space_quirks.c
|
|
@@ -0,0 +1,140 @@
|
|
+/*
|
|
+ * PCI Backend - Handle special overlays for broken devices.
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/pci.h>
|
|
+#include "pciback.h"
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_quirks.h"
|
|
+
|
|
+LIST_HEAD(pciback_quirks);
|
|
+
|
|
+static inline const struct pci_device_id *
|
|
+match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
|
|
+{
|
|
+ if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
|
|
+ (id->device == PCI_ANY_ID || id->device == dev->device) &&
|
|
+ (id->subvendor == PCI_ANY_ID ||
|
|
+ id->subvendor == dev->subsystem_vendor) &&
|
|
+ (id->subdevice == PCI_ANY_ID ||
|
|
+ id->subdevice == dev->subsystem_device) &&
|
|
+ !((id->class ^ dev->class) & id->class_mask))
|
|
+ return id;
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_config_quirk *tmp_quirk;
|
|
+
|
|
+ list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
|
|
+ if (match_one_device(&tmp_quirk->devid, dev) != NULL)
|
|
+ goto out;
|
|
+ tmp_quirk = NULL;
|
|
+ printk(KERN_DEBUG
|
|
+ "quirk didn't match any device pciback knows about\n");
|
|
+out:
|
|
+ return tmp_quirk;
|
|
+}
|
|
+
|
|
+static inline void register_quirk(struct pciback_config_quirk *quirk)
|
|
+{
|
|
+ list_add_tail(&quirk->quirks_list, &pciback_quirks);
|
|
+}
|
|
+
|
|
+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
|
|
+{
|
|
+ int ret = 0;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ struct config_field_entry *cfg_entry;
|
|
+
|
|
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
|
+ if (OFFSET(cfg_entry) == reg) {
|
|
+ ret = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
|
|
+ *field)
|
|
+{
|
|
+ int err = 0;
|
|
+
|
|
+ switch (field->size) {
|
|
+ case 1:
|
|
+ field->u.b.read = pciback_read_config_byte;
|
|
+ field->u.b.write = pciback_write_config_byte;
|
|
+ break;
|
|
+ case 2:
|
|
+ field->u.w.read = pciback_read_config_word;
|
|
+ field->u.w.write = pciback_write_config_word;
|
|
+ break;
|
|
+ case 4:
|
|
+ field->u.dw.read = pciback_read_config_dword;
|
|
+ field->u.dw.write = pciback_write_config_dword;
|
|
+ break;
|
|
+ default:
|
|
+ err = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ pciback_config_add_field(dev, field);
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int pciback_config_quirks_init(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_config_quirk *quirk;
|
|
+ int ret = 0;
|
|
+
|
|
+ quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
|
|
+ if (!quirk) {
|
|
+ ret = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ quirk->devid.vendor = dev->vendor;
|
|
+ quirk->devid.device = dev->device;
|
|
+ quirk->devid.subvendor = dev->subsystem_vendor;
|
|
+ quirk->devid.subdevice = dev->subsystem_device;
|
|
+ quirk->devid.class = 0;
|
|
+ quirk->devid.class_mask = 0;
|
|
+ quirk->devid.driver_data = 0UL;
|
|
+
|
|
+ quirk->pdev = dev;
|
|
+
|
|
+ register_quirk(quirk);
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void pciback_config_field_free(struct config_field *field)
|
|
+{
|
|
+ kfree(field);
|
|
+}
|
|
+
|
|
+int pciback_config_quirk_release(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_config_quirk *quirk;
|
|
+ int ret = 0;
|
|
+
|
|
+ quirk = pciback_find_quirk(dev);
|
|
+ if (!quirk) {
|
|
+ ret = -ENXIO;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ list_del(&quirk->quirks_list);
|
|
+ kfree(quirk);
|
|
+
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
diff --git a/drivers/xen/pciback/conf_space_quirks.h b/drivers/xen/pciback/conf_space_quirks.h
|
|
new file mode 100644
|
|
index 0000000..acd0e1a
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/conf_space_quirks.h
|
|
@@ -0,0 +1,35 @@
|
|
+/*
|
|
+ * PCI Backend - Data structures for special overlays for broken devices.
|
|
+ *
|
|
+ * Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ * Chris Bookholt <hap10@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
|
|
+#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include <linux/list.h>
|
|
+
|
|
+struct pciback_config_quirk {
|
|
+ struct list_head quirks_list;
|
|
+ struct pci_device_id devid;
|
|
+ struct pci_dev *pdev;
|
|
+};
|
|
+
|
|
+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
|
|
+
|
|
+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
|
|
+ *field);
|
|
+
|
|
+int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
|
|
+
|
|
+int pciback_config_quirks_init(struct pci_dev *dev);
|
|
+
|
|
+void pciback_config_field_free(struct config_field *field);
|
|
+
|
|
+int pciback_config_quirk_release(struct pci_dev *dev);
|
|
+
|
|
+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
|
|
+
|
|
+#endif
|
|
diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
|
|
new file mode 100644
|
|
index 0000000..5a7e4cc
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/controller.c
|
|
@@ -0,0 +1,442 @@
|
|
+/*
|
|
+ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Alex Williamson <alex.williamson@hp.com>
|
|
+ *
|
|
+ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI
|
|
+ * controllers. Devices under the same PCI controller are exposed on the
|
|
+ * same virtual domain:bus. Within a bus, device slots are virtualized
|
|
+ * to compact the bus.
|
|
+ *
|
|
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
+ */
|
|
+
|
|
+#include <linux/acpi.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+#define PCI_MAX_BUSSES 255
|
|
+#define PCI_MAX_SLOTS 32
|
|
+
|
|
+struct controller_dev_entry {
|
|
+ struct list_head list;
|
|
+ struct pci_dev *dev;
|
|
+ unsigned int devfn;
|
|
+};
|
|
+
|
|
+struct controller_list_entry {
|
|
+ struct list_head list;
|
|
+ struct pci_controller *controller;
|
|
+ unsigned int domain;
|
|
+ unsigned int bus;
|
|
+ unsigned int next_devfn;
|
|
+ struct list_head dev_list;
|
|
+};
|
|
+
|
|
+struct controller_dev_data {
|
|
+ struct list_head list;
|
|
+ unsigned int next_domain;
|
|
+ unsigned int next_bus;
|
|
+ spinlock_t lock;
|
|
+};
|
|
+
|
|
+struct walk_info {
|
|
+ struct pciback_device *pdev;
|
|
+ int resource_count;
|
|
+ int root_num;
|
|
+};
|
|
+
|
|
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_dev_entry *dev_entry;
|
|
+ struct controller_list_entry *cntrl_entry;
|
|
+ struct pci_dev *dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
|
|
+ if (cntrl_entry->domain != domain ||
|
|
+ cntrl_entry->bus != bus)
|
|
+ continue;
|
|
+
|
|
+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
|
|
+ if (devfn == dev_entry->devfn) {
|
|
+ dev = dev_entry->dev;
|
|
+ goto found;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+found:
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+
|
|
+ return dev;
|
|
+}
|
|
+
|
|
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
|
|
+ int devid, publish_pci_dev_cb publish_cb)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_dev_entry *dev_entry;
|
|
+ struct controller_list_entry *cntrl_entry;
|
|
+ struct pci_controller *dev_controller = PCI_CONTROLLER(dev);
|
|
+ unsigned long flags;
|
|
+ int ret = 0, found = 0;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ /* Look to see if we already have a domain:bus for this controller */
|
|
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
|
|
+ if (cntrl_entry->controller == dev_controller) {
|
|
+ found = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!found) {
|
|
+ cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC);
|
|
+ if (!cntrl_entry) {
|
|
+ ret = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ cntrl_entry->controller = dev_controller;
|
|
+ cntrl_entry->next_devfn = PCI_DEVFN(0, 0);
|
|
+
|
|
+ cntrl_entry->domain = dev_data->next_domain;
|
|
+ cntrl_entry->bus = dev_data->next_bus++;
|
|
+ if (dev_data->next_bus > PCI_MAX_BUSSES) {
|
|
+ dev_data->next_domain++;
|
|
+ dev_data->next_bus = 0;
|
|
+ }
|
|
+
|
|
+ INIT_LIST_HEAD(&cntrl_entry->dev_list);
|
|
+
|
|
+ list_add_tail(&cntrl_entry->list, &dev_data->list);
|
|
+ }
|
|
+
|
|
+ if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) {
|
|
+ /*
|
|
+ * While it seems unlikely, this can actually happen if
|
|
+ * a controller has P2P bridges under it.
|
|
+ */
|
|
+ xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x "
|
|
+ "is full, no room to export %04x:%02x:%02x.%x",
|
|
+ cntrl_entry->domain, cntrl_entry->bus,
|
|
+ pci_domain_nr(dev->bus), dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
|
|
+ ret = -ENOSPC;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC);
|
|
+ if (!dev_entry) {
|
|
+ if (list_empty(&cntrl_entry->dev_list)) {
|
|
+ list_del(&cntrl_entry->list);
|
|
+ kfree(cntrl_entry);
|
|
+ }
|
|
+ ret = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_entry->dev = dev;
|
|
+ dev_entry->devfn = cntrl_entry->next_devfn;
|
|
+
|
|
+ list_add_tail(&dev_entry->list, &cntrl_entry->dev_list);
|
|
+
|
|
+ cntrl_entry->next_devfn += PCI_DEVFN(1, 0);
|
|
+
|
|
+out:
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+
|
|
+ /* TODO: Publish virtual domain:bus:slot.func here. */
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_list_entry *cntrl_entry;
|
|
+ struct controller_dev_entry *dev_entry = NULL;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
|
|
+ if (cntrl_entry->controller != PCI_CONTROLLER(dev))
|
|
+ continue;
|
|
+
|
|
+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
|
|
+ if (dev_entry->dev == dev) {
|
|
+ found_dev = dev_entry->dev;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!found_dev) {
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ list_del(&dev_entry->list);
|
|
+ kfree(dev_entry);
|
|
+
|
|
+ if (list_empty(&cntrl_entry->dev_list)) {
|
|
+ list_del(&cntrl_entry->list);
|
|
+ kfree(cntrl_entry);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+ pcistub_put_pci_dev(found_dev);
|
|
+}
|
|
+
|
|
+int pciback_init_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ struct controller_dev_data *dev_data;
|
|
+
|
|
+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
|
|
+ if (!dev_data)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock_init(&dev_data->lock);
|
|
+
|
|
+ INIT_LIST_HEAD(&dev_data->list);
|
|
+
|
|
+ /* Starting domain:bus numbers */
|
|
+ dev_data->next_domain = 0;
|
|
+ dev_data->next_bus = 0;
|
|
+
|
|
+ pdev->pci_dev_data = dev_data;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
|
|
+{
|
|
+ struct walk_info *info = data;
|
|
+ struct acpi_resource_address64 addr;
|
|
+ acpi_status status;
|
|
+ int i, len, err;
|
|
+ char str[32], tmp[3];
|
|
+ unsigned char *ptr, *buf;
|
|
+
|
|
+ status = acpi_resource_to_address64(res, &addr);
|
|
+
|
|
+ /* Do we care about this range? Let's check. */
|
|
+ if (!ACPI_SUCCESS(status) ||
|
|
+ !(addr.resource_type == ACPI_MEMORY_RANGE ||
|
|
+ addr.resource_type == ACPI_IO_RANGE) ||
|
|
+ !addr.address_length || addr.producer_consumer != ACPI_PRODUCER)
|
|
+ return AE_OK;
|
|
+
|
|
+ /*
|
|
+ * Furthermore, we really only care to tell the guest about
|
|
+ * address ranges that require address translation of some sort.
|
|
+ */
|
|
+ if (!(addr.resource_type == ACPI_MEMORY_RANGE &&
|
|
+ addr.info.mem.translation) &&
|
|
+ !(addr.resource_type == ACPI_IO_RANGE &&
|
|
+ addr.info.io.translation))
|
|
+ return AE_OK;
|
|
+
|
|
+ /* Store the resource in xenbus for the guest */
|
|
+ len = snprintf(str, sizeof(str), "root-%d-resource-%d",
|
|
+ info->root_num, info->resource_count);
|
|
+ if (unlikely(len >= (sizeof(str) - 1)))
|
|
+ return AE_OK;
|
|
+
|
|
+ buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL);
|
|
+ if (!buf)
|
|
+ return AE_OK;
|
|
+
|
|
+ /* Clean out resource_source */
|
|
+ res->data.address64.resource_source.index = 0xFF;
|
|
+ res->data.address64.resource_source.string_length = 0;
|
|
+ res->data.address64.resource_source.string_ptr = NULL;
|
|
+
|
|
+ ptr = (unsigned char *)res;
|
|
+
|
|
+ /* Turn the acpi_resource into an ASCII byte stream */
|
|
+ for (i = 0; i < sizeof(*res); i++) {
|
|
+ snprintf(tmp, sizeof(tmp), "%02x", ptr[i]);
|
|
+ strncat(buf, tmp, 2);
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename,
|
|
+ str, "%s", buf);
|
|
+
|
|
+ if (!err)
|
|
+ info->resource_count++;
|
|
+
|
|
+ kfree(buf);
|
|
+
|
|
+ return AE_OK;
|
|
+}
|
|
+
|
|
+int pciback_publish_pci_roots(struct pciback_device *pdev,
|
|
+ publish_pci_root_cb publish_root_cb)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_list_entry *cntrl_entry;
|
|
+ int i, root_num, len, err = 0;
|
|
+ unsigned int domain, bus;
|
|
+ char str[64];
|
|
+ struct walk_info info;
|
|
+
|
|
+ spin_lock(&dev_data->lock);
|
|
+
|
|
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
|
|
+ /* First publish all the domain:bus info */
|
|
+ err = publish_root_cb(pdev, cntrl_entry->domain,
|
|
+ cntrl_entry->bus);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /*
|
|
+ * Now figure out which root-%d this belongs to
|
|
+ * so we can associate resources with it.
|
|
+ */
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ "root_num", "%d", &root_num);
|
|
+
|
|
+ if (err != 1)
|
|
+ goto out;
|
|
+
|
|
+ for (i = 0; i < root_num; i++) {
|
|
+ len = snprintf(str, sizeof(str), "root-%d", i);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ str, "%x:%x", &domain, &bus);
|
|
+ if (err != 2)
|
|
+ goto out;
|
|
+
|
|
+ /* Is this the one we just published? */
|
|
+ if (domain == cntrl_entry->domain &&
|
|
+ bus == cntrl_entry->bus)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (i == root_num)
|
|
+ goto out;
|
|
+
|
|
+ info.pdev = pdev;
|
|
+ info.resource_count = 0;
|
|
+ info.root_num = i;
|
|
+
|
|
+ /* Let ACPI do the heavy lifting on decoding resources */
|
|
+ acpi_walk_resources(cntrl_entry->controller->acpi_handle,
|
|
+ METHOD_NAME__CRS, write_xenbus_resource,
|
|
+ &info);
|
|
+
|
|
+ /* No resouces. OK. On to the next one */
|
|
+ if (!info.resource_count)
|
|
+ continue;
|
|
+
|
|
+ /* Store the number of resources we wrote for this root-%d */
|
|
+ len = snprintf(str, sizeof(str), "root-%d-resources", i);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
|
|
+ "%d", info.resource_count);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Finally, write some magic to synchronize with the guest. */
|
|
+ len = snprintf(str, sizeof(str), "root-resource-magic");
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
|
|
+ "%lx", (sizeof(struct acpi_resource) *2) + 1);
|
|
+
|
|
+out:
|
|
+ spin_unlock(&dev_data->lock);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pciback_release_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_list_entry *cntrl_entry, *c;
|
|
+ struct controller_dev_entry *dev_entry, *d;
|
|
+
|
|
+ list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) {
|
|
+ list_for_each_entry_safe(dev_entry, d,
|
|
+ &cntrl_entry->dev_list, list) {
|
|
+ list_del(&dev_entry->list);
|
|
+ pcistub_put_pci_dev(dev_entry->dev);
|
|
+ kfree(dev_entry);
|
|
+ }
|
|
+ list_del(&cntrl_entry->list);
|
|
+ kfree(cntrl_entry);
|
|
+ }
|
|
+
|
|
+ kfree(dev_data);
|
|
+ pdev->pci_dev_data = NULL;
|
|
+}
|
|
+
|
|
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
|
|
+ struct pciback_device *pdev,
|
|
+ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_dev_entry *dev_entry;
|
|
+ struct controller_list_entry *cntrl_entry;
|
|
+ unsigned long flags;
|
|
+ int found = 0;
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
|
|
+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
|
|
+ if ((dev_entry->dev->bus->number ==
|
|
+ pcidev->bus->number) &&
|
|
+ (dev_entry->dev->devfn ==
|
|
+ pcidev->devfn) &&
|
|
+ (pci_domain_nr(dev_entry->dev->bus) ==
|
|
+ pci_domain_nr(pcidev->bus))) {
|
|
+ found = 1;
|
|
+ *domain = cntrl_entry->domain;
|
|
+ *bus = cntrl_entry->bus;
|
|
+ *devfn = dev_entry->devfn;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+out:
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+ return found;
|
|
+
|
|
+}
|
|
+
|
|
diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
|
|
new file mode 100644
|
|
index 0000000..5386bebf
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/passthrough.c
|
|
@@ -0,0 +1,178 @@
|
|
+/*
|
|
+ * PCI Backend - Provides restricted access to the real PCI bus topology
|
|
+ * to the frontend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/list.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+struct passthrough_dev_data {
|
|
+ /* Access to dev_list must be protected by lock */
|
|
+ struct list_head dev_list;
|
|
+ spinlock_t lock;
|
|
+};
|
|
+
|
|
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn)
|
|
+{
|
|
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct pci_dev_entry *dev_entry;
|
|
+ struct pci_dev *dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
|
|
+ if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
|
|
+ && bus == (unsigned int)dev_entry->dev->bus->number
|
|
+ && devfn == dev_entry->dev->devfn) {
|
|
+ dev = dev_entry->dev;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+
|
|
+ return dev;
|
|
+}
|
|
+
|
|
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
|
|
+ int devid, publish_pci_dev_cb publish_cb)
|
|
+{
|
|
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct pci_dev_entry *dev_entry;
|
|
+ unsigned long flags;
|
|
+ unsigned int domain, bus, devfn;
|
|
+ int err;
|
|
+
|
|
+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
|
|
+ if (!dev_entry)
|
|
+ return -ENOMEM;
|
|
+ dev_entry->dev = dev;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+ list_add_tail(&dev_entry->list, &dev_data->dev_list);
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+
|
|
+ /* Publish this device. */
|
|
+ domain = (unsigned int)pci_domain_nr(dev->bus);
|
|
+ bus = (unsigned int)dev->bus->number;
|
|
+ devfn = dev->devfn;
|
|
+ err = publish_cb(pdev, domain, bus, devfn, devid);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
|
|
+{
|
|
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct pci_dev_entry *dev_entry, *t;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
|
|
+ if (dev_entry->dev == dev) {
|
|
+ list_del(&dev_entry->list);
|
|
+ found_dev = dev_entry->dev;
|
|
+ kfree(dev_entry);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+
|
|
+ if (found_dev)
|
|
+ pcistub_put_pci_dev(found_dev);
|
|
+}
|
|
+
|
|
+int pciback_init_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ struct passthrough_dev_data *dev_data;
|
|
+
|
|
+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
|
|
+ if (!dev_data)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock_init(&dev_data->lock);
|
|
+
|
|
+ INIT_LIST_HEAD(&dev_data->dev_list);
|
|
+
|
|
+ pdev->pci_dev_data = dev_data;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_publish_pci_roots(struct pciback_device *pdev,
|
|
+ publish_pci_root_cb publish_root_cb)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct pci_dev_entry *dev_entry, *e;
|
|
+ struct pci_dev *dev;
|
|
+ int found;
|
|
+ unsigned int domain, bus;
|
|
+
|
|
+ spin_lock(&dev_data->lock);
|
|
+
|
|
+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
|
|
+ /* Only publish this device as a root if none of its
|
|
+ * parent bridges are exported
|
|
+ */
|
|
+ found = 0;
|
|
+ dev = dev_entry->dev->bus->self;
|
|
+ for (; !found && dev != NULL; dev = dev->bus->self) {
|
|
+ list_for_each_entry(e, &dev_data->dev_list, list) {
|
|
+ if (dev == e->dev) {
|
|
+ found = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
|
|
+ bus = (unsigned int)dev_entry->dev->bus->number;
|
|
+
|
|
+ if (!found) {
|
|
+ err = publish_root_cb(pdev, domain, bus);
|
|
+ if (err)
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock(&dev_data->lock);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pciback_release_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct pci_dev_entry *dev_entry, *t;
|
|
+
|
|
+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
|
|
+ list_del(&dev_entry->list);
|
|
+ pcistub_put_pci_dev(dev_entry->dev);
|
|
+ kfree(dev_entry);
|
|
+ }
|
|
+
|
|
+ kfree(dev_data);
|
|
+ pdev->pci_dev_data = NULL;
|
|
+}
|
|
+
|
|
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
|
|
+ struct pciback_device *pdev,
|
|
+ unsigned int *domain, unsigned int *bus,
|
|
+ unsigned int *devfn)
|
|
+
|
|
+{
|
|
+ *domain = pci_domain_nr(pcidev->bus);
|
|
+ *bus = pcidev->bus->number;
|
|
+ *devfn = pcidev->devfn;
|
|
+ return 1;
|
|
+}
|
|
diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
|
|
new file mode 100644
|
|
index 0000000..c4d1071
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/pci_stub.c
|
|
@@ -0,0 +1,1371 @@
|
|
+/*
|
|
+ * PCI Stub Driver - Grabs devices in backend to be exported later
|
|
+ *
|
|
+ * Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ * Chris Bookholt <hap10@epoch.ncsc.mil>
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/rwsem.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/kref.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/wait.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/atomic.h>
|
|
+#include <xen/events.h>
|
|
+#include <asm/xen/pci.h>
|
|
+#include <asm/xen/hypervisor.h>
|
|
+#include "pciback.h"
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_quirks.h"
|
|
+
|
|
+#define DRV_NAME "pciback"
|
|
+
|
|
+static char *pci_devs_to_hide;
|
|
+wait_queue_head_t aer_wait_queue;
|
|
+/*Add sem for sync AER handling and pciback remove/reconfigue ops,
|
|
+* We want to avoid in middle of AER ops, pciback devices is being removed
|
|
+*/
|
|
+static DECLARE_RWSEM(pcistub_sem);
|
|
+module_param_named(hide, pci_devs_to_hide, charp, 0444);
|
|
+
|
|
+struct pcistub_device_id {
|
|
+ struct list_head slot_list;
|
|
+ int domain;
|
|
+ unsigned char bus;
|
|
+ unsigned int devfn;
|
|
+};
|
|
+static LIST_HEAD(pcistub_device_ids);
|
|
+static DEFINE_SPINLOCK(device_ids_lock);
|
|
+
|
|
+struct pcistub_device {
|
|
+ struct kref kref;
|
|
+ struct list_head dev_list;
|
|
+ spinlock_t lock;
|
|
+
|
|
+ struct pci_dev *dev;
|
|
+ struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */
|
|
+};
|
|
+
|
|
+/* Access to pcistub_devices & seized_devices lists and the initialize_devices
|
|
+ * flag must be locked with pcistub_devices_lock
|
|
+ */
|
|
+static DEFINE_SPINLOCK(pcistub_devices_lock);
|
|
+static LIST_HEAD(pcistub_devices);
|
|
+
|
|
+/* wait for device_initcall before initializing our devices
|
|
+ * (see pcistub_init_devices_late)
|
|
+ */
|
|
+static int initialize_devices;
|
|
+static LIST_HEAD(seized_devices);
|
|
+
|
|
+static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+
|
|
+ dev_dbg(&dev->dev, "pcistub_device_alloc\n");
|
|
+
|
|
+ psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
|
|
+ if (!psdev)
|
|
+ return NULL;
|
|
+
|
|
+ psdev->dev = pci_dev_get(dev);
|
|
+ if (!psdev->dev) {
|
|
+ kfree(psdev);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ kref_init(&psdev->kref);
|
|
+ spin_lock_init(&psdev->lock);
|
|
+
|
|
+ return psdev;
|
|
+}
|
|
+
|
|
+/* Don't call this directly as it's called by pcistub_device_put */
|
|
+static void pcistub_device_release(struct kref *kref)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+
|
|
+ psdev = container_of(kref, struct pcistub_device, kref);
|
|
+
|
|
+ dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
|
|
+
|
|
+ xen_unregister_device_domain_owner(psdev->dev);
|
|
+
|
|
+ /* Clean-up the device */
|
|
+ pciback_reset_device(psdev->dev);
|
|
+ pciback_config_free_dyn_fields(psdev->dev);
|
|
+ pciback_config_free_dev(psdev->dev);
|
|
+ kfree(pci_get_drvdata(psdev->dev));
|
|
+ pci_set_drvdata(psdev->dev, NULL);
|
|
+
|
|
+ pci_dev_put(psdev->dev);
|
|
+
|
|
+ kfree(psdev);
|
|
+}
|
|
+
|
|
+static inline void pcistub_device_get(struct pcistub_device *psdev)
|
|
+{
|
|
+ kref_get(&psdev->kref);
|
|
+}
|
|
+
|
|
+static inline void pcistub_device_put(struct pcistub_device *psdev)
|
|
+{
|
|
+ kref_put(&psdev->kref, pcistub_device_release);
|
|
+}
|
|
+
|
|
+static struct pcistub_device *pcistub_device_find(int domain, int bus,
|
|
+ int slot, int func)
|
|
+{
|
|
+ struct pcistub_device *psdev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (psdev->dev != NULL
|
|
+ && domain == pci_domain_nr(psdev->dev->bus)
|
|
+ && bus == psdev->dev->bus->number
|
|
+ && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
|
|
+ pcistub_device_get(psdev);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* didn't find it */
|
|
+ psdev = NULL;
|
|
+
|
|
+out:
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+ return psdev;
|
|
+}
|
|
+
|
|
+static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
|
|
+ struct pcistub_device *psdev)
|
|
+{
|
|
+ struct pci_dev *pci_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ pcistub_device_get(psdev);
|
|
+
|
|
+ spin_lock_irqsave(&psdev->lock, flags);
|
|
+ if (!psdev->pdev) {
|
|
+ psdev->pdev = pdev;
|
|
+ pci_dev = psdev->dev;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&psdev->lock, flags);
|
|
+
|
|
+ if (!pci_dev)
|
|
+ pcistub_device_put(psdev);
|
|
+
|
|
+ return pci_dev;
|
|
+}
|
|
+
|
|
+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
|
|
+ int domain, int bus,
|
|
+ int slot, int func)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (psdev->dev != NULL
|
|
+ && domain == pci_domain_nr(psdev->dev->bus)
|
|
+ && bus == psdev->dev->bus->number
|
|
+ && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
|
|
+ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+ return found_dev;
|
|
+}
|
|
+
|
|
+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (psdev->dev == dev) {
|
|
+ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+ return found_dev;
|
|
+}
|
|
+
|
|
+void pcistub_put_pci_dev(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev, *found_psdev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (psdev->dev == dev) {
|
|
+ found_psdev = psdev;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ /*hold this lock for avoiding breaking link between
|
|
+ * pcistub and pciback when AER is in processing
|
|
+ */
|
|
+ down_write(&pcistub_sem);
|
|
+ /* Cleanup our device
|
|
+ * (so it's ready for the next domain)
|
|
+ */
|
|
+ pciback_reset_device(found_psdev->dev);
|
|
+ pciback_config_free_dyn_fields(found_psdev->dev);
|
|
+ pciback_config_reset_dev(found_psdev->dev);
|
|
+
|
|
+ spin_lock_irqsave(&found_psdev->lock, flags);
|
|
+ found_psdev->pdev = NULL;
|
|
+ spin_unlock_irqrestore(&found_psdev->lock, flags);
|
|
+
|
|
+ pcistub_device_put(found_psdev);
|
|
+ up_write(&pcistub_sem);
|
|
+}
|
|
+
|
|
+static int __devinit pcistub_match_one(struct pci_dev *dev,
|
|
+ struct pcistub_device_id *pdev_id)
|
|
+{
|
|
+ /* Match the specified device by domain, bus, slot, func and also if
|
|
+ * any of the device's parent bridges match.
|
|
+ */
|
|
+ for (; dev != NULL; dev = dev->bus->self) {
|
|
+ if (pci_domain_nr(dev->bus) == pdev_id->domain
|
|
+ && dev->bus->number == pdev_id->bus
|
|
+ && dev->devfn == pdev_id->devfn)
|
|
+ return 1;
|
|
+
|
|
+ /* Sometimes topmost bridge links to itself. */
|
|
+ if (dev == dev->bus->self)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int __devinit pcistub_match(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device_id *pdev_id;
|
|
+ unsigned long flags;
|
|
+ int found = 0;
|
|
+
|
|
+ spin_lock_irqsave(&device_ids_lock, flags);
|
|
+ list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
|
|
+ if (pcistub_match_one(dev, pdev_id)) {
|
|
+ found = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&device_ids_lock, flags);
|
|
+
|
|
+ return found;
|
|
+}
|
|
+
|
|
+static int __devinit pcistub_init_device(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ int err = 0;
|
|
+
|
|
+ dev_dbg(&dev->dev, "initializing...\n");
|
|
+
|
|
+ /* The PCI backend is not intended to be a module (or to work with
|
|
+ * removable PCI devices (yet). If it were, pciback_config_free()
|
|
+ * would need to be called somewhere to free the memory allocated
|
|
+ * here and then to call kfree(pci_get_drvdata(psdev->dev)).
|
|
+ */
|
|
+ dev_data = kzalloc(sizeof(*dev_data) + strlen(DRV_NAME "[]")
|
|
+ + strlen(pci_name(dev)) + 1, GFP_ATOMIC);
|
|
+ if (!dev_data) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+ pci_set_drvdata(dev, dev_data);
|
|
+
|
|
+ /*
|
|
+ * Setup name for fake IRQ handler. It will only be enabled
|
|
+ * once the device is turned on by the guest.
|
|
+ */
|
|
+ sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
|
|
+
|
|
+ dev_dbg(&dev->dev, "initializing config\n");
|
|
+
|
|
+ init_waitqueue_head(&aer_wait_queue);
|
|
+ err = pciback_config_init_dev(dev);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* HACK: Force device (& ACPI) to determine what IRQ it's on - we
|
|
+ * must do this here because pcibios_enable_device may specify
|
|
+ * the pci device's true irq (and possibly its other resources)
|
|
+ * if they differ from what's in the configuration space.
|
|
+ * This makes the assumption that the device's resources won't
|
|
+ * change after this point (otherwise this code may break!)
|
|
+ */
|
|
+ dev_dbg(&dev->dev, "enabling device\n");
|
|
+ err = pci_enable_device(dev);
|
|
+ if (err)
|
|
+ goto config_release;
|
|
+
|
|
+ /* Now disable the device (this also ensures some private device
|
|
+ * data is setup before we export)
|
|
+ */
|
|
+ dev_dbg(&dev->dev, "reset device\n");
|
|
+ pciback_reset_device(dev);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+config_release:
|
|
+ pciback_config_free_dev(dev);
|
|
+
|
|
+out:
|
|
+ pci_set_drvdata(dev, NULL);
|
|
+ kfree(dev_data);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Because some initialization still happens on
|
|
+ * devices during fs_initcall, we need to defer
|
|
+ * full initialization of our devices until
|
|
+ * device_initcall.
|
|
+ */
|
|
+static int __init pcistub_init_devices_late(void)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ unsigned long flags;
|
|
+ int err = 0;
|
|
+
|
|
+ pr_debug("pciback: pcistub_init_devices_late\n");
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ while (!list_empty(&seized_devices)) {
|
|
+ psdev = container_of(seized_devices.next,
|
|
+ struct pcistub_device, dev_list);
|
|
+ list_del(&psdev->dev_list);
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ err = pcistub_init_device(psdev->dev);
|
|
+ if (err) {
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "error %d initializing device\n", err);
|
|
+ kfree(psdev);
|
|
+ psdev = NULL;
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ if (psdev)
|
|
+ list_add_tail(&psdev->dev_list, &pcistub_devices);
|
|
+ }
|
|
+
|
|
+ initialize_devices = 1;
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int __devinit pcistub_seize(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ unsigned long flags;
|
|
+ int err = 0;
|
|
+
|
|
+ psdev = pcistub_device_alloc(dev);
|
|
+ if (!psdev)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ if (initialize_devices) {
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ /* don't want irqs disabled when calling pcistub_init_device */
|
|
+ err = pcistub_init_device(psdev->dev);
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ if (!err)
|
|
+ list_add(&psdev->dev_list, &pcistub_devices);
|
|
+ } else {
|
|
+ dev_dbg(&dev->dev, "deferring initialization\n");
|
|
+ list_add(&psdev->dev_list, &seized_devices);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ if (err)
|
|
+ pcistub_device_put(psdev);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int __devinit pcistub_probe(struct pci_dev *dev,
|
|
+ const struct pci_device_id *id)
|
|
+{
|
|
+ int err = 0;
|
|
+
|
|
+ dev_dbg(&dev->dev, "probing...\n");
|
|
+
|
|
+ if (pcistub_match(dev)) {
|
|
+
|
|
+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
|
|
+ && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
|
|
+ dev_err(&dev->dev, "can't export pci devices that "
|
|
+ "don't have a normal (0) or bridge (1) "
|
|
+ "header type!\n");
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_info(&dev->dev, "seizing device\n");
|
|
+ err = pcistub_seize(dev);
|
|
+ } else
|
|
+ /* Didn't find the device */
|
|
+ err = -ENODEV;
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void pcistub_remove(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev, *found_psdev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ dev_dbg(&dev->dev, "removing\n");
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ pciback_config_quirk_release(dev);
|
|
+
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (psdev->dev == dev) {
|
|
+ found_psdev = psdev;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ if (found_psdev) {
|
|
+ dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
|
|
+ found_psdev->pdev);
|
|
+
|
|
+ if (found_psdev->pdev) {
|
|
+ printk(KERN_WARNING "pciback: ****** removing device "
|
|
+ "%s while still in-use! ******\n",
|
|
+ pci_name(found_psdev->dev));
|
|
+ printk(KERN_WARNING "pciback: ****** driver domain may "
|
|
+ "still access this device's i/o resources!\n");
|
|
+ printk(KERN_WARNING "pciback: ****** shutdown driver "
|
|
+ "domain before binding device\n");
|
|
+ printk(KERN_WARNING "pciback: ****** to other drivers "
|
|
+ "or domains\n");
|
|
+
|
|
+ pciback_release_pci_dev(found_psdev->pdev,
|
|
+ found_psdev->dev);
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+ list_del(&found_psdev->dev_list);
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ /* the final put for releasing from the list */
|
|
+ pcistub_device_put(found_psdev);
|
|
+ }
|
|
+}
|
|
+
|
|
+static DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = {
|
|
+ {
|
|
+ .vendor = PCI_ANY_ID,
|
|
+ .device = PCI_ANY_ID,
|
|
+ .subvendor = PCI_ANY_ID,
|
|
+ .subdevice = PCI_ANY_ID,
|
|
+ },
|
|
+ {0,},
|
|
+};
|
|
+
|
|
+#define PCI_NODENAME_MAX 40
|
|
+static void kill_domain_by_device(struct pcistub_device *psdev)
|
|
+{
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+ char nodename[PCI_NODENAME_MAX];
|
|
+
|
|
+ if (!psdev)
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "device is NULL when do AER recovery/kill_domain\n");
|
|
+ snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
|
|
+ psdev->pdev->xdev->otherend_id);
|
|
+ nodename[strlen(nodename)] = '\0';
|
|
+
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "error %d when start xenbus transaction\n", err);
|
|
+ return;
|
|
+ }
|
|
+ /*PV AER handlers will set this flag*/
|
|
+ xenbus_printf(xbt, nodename, "aerState" , "aerfail");
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err) {
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "error %d when end xenbus transaction\n", err);
|
|
+ return;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
|
|
+ * backend need to have cooperation. In pciback, those steps will do similar
|
|
+ * jobs: send service request and waiting for front_end response.
|
|
+*/
|
|
+static pci_ers_result_t common_process(struct pcistub_device *psdev,
|
|
+ pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
|
|
+{
|
|
+ pci_ers_result_t res = result;
|
|
+ struct xen_pcie_aer_op *aer_op;
|
|
+ int ret;
|
|
+
|
|
+ /*with PV AER drivers*/
|
|
+ aer_op = &(psdev->pdev->sh_info->aer_op);
|
|
+ aer_op->cmd = aer_cmd ;
|
|
+ /*useful for error_detected callback*/
|
|
+ aer_op->err = state;
|
|
+ /*pcifront_end BDF*/
|
|
+ ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev,
|
|
+ &aer_op->domain, &aer_op->bus, &aer_op->devfn);
|
|
+ if (!ret) {
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "pciback: failed to get pcifront device\n");
|
|
+ return PCI_ERS_RESULT_NONE;
|
|
+ }
|
|
+ wmb();
|
|
+
|
|
+ dev_dbg(&psdev->dev->dev,
|
|
+ "pciback: aer_op %x dom %x bus %x devfn %x\n",
|
|
+ aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
|
|
+ /*local flag to mark there's aer request, pciback callback will use this
|
|
+ * flag to judge whether we need to check pci-front give aer service
|
|
+ * ack signal
|
|
+ */
|
|
+ set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
|
|
+
|
|
+ /*It is possible that a pcifront conf_read_write ops request invokes
|
|
+ * the callback which cause the spurious execution of wake_up.
|
|
+ * Yet it is harmless and better than a spinlock here
|
|
+ */
|
|
+ set_bit(_XEN_PCIB_active,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags);
|
|
+ wmb();
|
|
+ notify_remote_via_irq(psdev->pdev->evtchn_irq);
|
|
+
|
|
+ ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
|
|
+
|
|
+ if (!ret) {
|
|
+ if (test_bit(_XEN_PCIB_active,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "pcifront aer process not responding!\n");
|
|
+ clear_bit(_XEN_PCIB_active,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags);
|
|
+ aer_op->err = PCI_ERS_RESULT_NONE;
|
|
+ return res;
|
|
+ }
|
|
+ }
|
|
+ clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
|
|
+
|
|
+ if (test_bit(_XEN_PCIF_active,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
|
|
+ dev_dbg(&psdev->dev->dev,
|
|
+ "schedule pci_conf service in pciback\n");
|
|
+ test_and_schedule_op(psdev->pdev);
|
|
+ }
|
|
+
|
|
+ res = (pci_ers_result_t)aer_op->err;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+/*
|
|
+* pciback_slot_reset: it will send the slot_reset request to pcifront in case
|
|
+* of the device driver could provide this service, and then wait for pcifront
|
|
+* ack.
|
|
+* @dev: pointer to PCI devices
|
|
+* return value is used by aer_core do_recovery policy
|
|
+*/
|
|
+static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ pci_ers_result_t result;
|
|
+
|
|
+ result = PCI_ERS_RESULT_RECOVERED;
|
|
+ dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n",
|
|
+ dev->bus->number, dev->devfn);
|
|
+
|
|
+ down_write(&pcistub_sem);
|
|
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
|
|
+ dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn),
|
|
+ PCI_FUNC(dev->devfn));
|
|
+
|
|
+ if (!psdev || !psdev->pdev) {
|
|
+ dev_err(&dev->dev,
|
|
+ "pciback device is not found/assigned\n");
|
|
+ goto end;
|
|
+ }
|
|
+
|
|
+ if (!psdev->pdev->sh_info) {
|
|
+ dev_err(&dev->dev, "pciback device is not connected or owned"
|
|
+ " by HVM, kill it\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+
|
|
+ if (!test_bit(_XEN_PCIB_AERHANDLER,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
|
|
+ dev_err(&dev->dev,
|
|
+ "guest with no AER driver should have been killed\n");
|
|
+ goto release;
|
|
+ }
|
|
+ result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
|
|
+
|
|
+ if (result == PCI_ERS_RESULT_NONE ||
|
|
+ result == PCI_ERS_RESULT_DISCONNECT) {
|
|
+ dev_dbg(&dev->dev,
|
|
+ "No AER slot_reset service or disconnected!\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ }
|
|
+release:
|
|
+ pcistub_device_put(psdev);
|
|
+end:
|
|
+ up_write(&pcistub_sem);
|
|
+ return result;
|
|
+
|
|
+}
|
|
+
|
|
+
|
|
+/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront
|
|
+* in case of the device driver could provide this service, and then wait
|
|
+* for pcifront ack
|
|
+* @dev: pointer to PCI devices
|
|
+* return value is used by aer_core do_recovery policy
|
|
+*/
|
|
+
|
|
+static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ pci_ers_result_t result;
|
|
+
|
|
+ result = PCI_ERS_RESULT_RECOVERED;
|
|
+ dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n",
|
|
+ dev->bus->number, dev->devfn);
|
|
+
|
|
+ down_write(&pcistub_sem);
|
|
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
|
|
+ dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn),
|
|
+ PCI_FUNC(dev->devfn));
|
|
+
|
|
+ if (!psdev || !psdev->pdev) {
|
|
+ dev_err(&dev->dev,
|
|
+ "pciback device is not found/assigned\n");
|
|
+ goto end;
|
|
+ }
|
|
+
|
|
+ if (!psdev->pdev->sh_info) {
|
|
+ dev_err(&dev->dev, "pciback device is not connected or owned"
|
|
+ " by HVM, kill it\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+
|
|
+ if (!test_bit(_XEN_PCIB_AERHANDLER,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
|
|
+ dev_err(&dev->dev,
|
|
+ "guest with no AER driver should have been killed\n");
|
|
+ goto release;
|
|
+ }
|
|
+ result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
|
|
+
|
|
+ if (result == PCI_ERS_RESULT_NONE ||
|
|
+ result == PCI_ERS_RESULT_DISCONNECT) {
|
|
+ dev_dbg(&dev->dev,
|
|
+ "No AER mmio_enabled service or disconnected!\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ }
|
|
+release:
|
|
+ pcistub_device_put(psdev);
|
|
+end:
|
|
+ up_write(&pcistub_sem);
|
|
+ return result;
|
|
+}
|
|
+
|
|
+/*pciback_error_detected: it will send the error_detected request to pcifront
|
|
+* in case of the device driver could provide this service, and then wait
|
|
+* for pcifront ack.
|
|
+* @dev: pointer to PCI devices
|
|
+* @error: the current PCI connection state
|
|
+* return value is used by aer_core do_recovery policy
|
|
+*/
|
|
+
|
|
+static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
|
|
+ pci_channel_state_t error)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ pci_ers_result_t result;
|
|
+
|
|
+ result = PCI_ERS_RESULT_CAN_RECOVER;
|
|
+ dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n",
|
|
+ dev->bus->number, dev->devfn);
|
|
+
|
|
+ down_write(&pcistub_sem);
|
|
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
|
|
+ dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn),
|
|
+ PCI_FUNC(dev->devfn));
|
|
+
|
|
+ if (!psdev || !psdev->pdev) {
|
|
+ dev_err(&dev->dev,
|
|
+ "pciback device is not found/assigned\n");
|
|
+ goto end;
|
|
+ }
|
|
+
|
|
+ if (!psdev->pdev->sh_info) {
|
|
+ dev_err(&dev->dev, "pciback device is not connected or owned"
|
|
+ " by HVM, kill it\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+
|
|
+ /*Guest owns the device yet no aer handler regiested, kill guest*/
|
|
+ if (!test_bit(_XEN_PCIB_AERHANDLER,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
|
|
+ dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+ result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
|
|
+
|
|
+ if (result == PCI_ERS_RESULT_NONE ||
|
|
+ result == PCI_ERS_RESULT_DISCONNECT) {
|
|
+ dev_dbg(&dev->dev,
|
|
+ "No AER error_detected service or disconnected!\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ }
|
|
+release:
|
|
+ pcistub_device_put(psdev);
|
|
+end:
|
|
+ up_write(&pcistub_sem);
|
|
+ return result;
|
|
+}
|
|
+
|
|
+/*pciback_error_resume: it will send the error_resume request to pcifront
|
|
+* in case of the device driver could provide this service, and then wait
|
|
+* for pcifront ack.
|
|
+* @dev: pointer to PCI devices
|
|
+*/
|
|
+
|
|
+static void pciback_error_resume(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+
|
|
+ dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n",
|
|
+ dev->bus->number, dev->devfn);
|
|
+
|
|
+ down_write(&pcistub_sem);
|
|
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
|
|
+ dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn),
|
|
+ PCI_FUNC(dev->devfn));
|
|
+
|
|
+ if (!psdev || !psdev->pdev) {
|
|
+ dev_err(&dev->dev,
|
|
+ "pciback device is not found/assigned\n");
|
|
+ goto end;
|
|
+ }
|
|
+
|
|
+ if (!psdev->pdev->sh_info) {
|
|
+ dev_err(&dev->dev, "pciback device is not connected or owned"
|
|
+ " by HVM, kill it\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+
|
|
+ if (!test_bit(_XEN_PCIB_AERHANDLER,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
|
|
+ dev_err(&dev->dev,
|
|
+ "guest with no AER driver should have been killed\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+ common_process(psdev, 1, XEN_PCI_OP_aer_resume,
|
|
+ PCI_ERS_RESULT_RECOVERED);
|
|
+release:
|
|
+ pcistub_device_put(psdev);
|
|
+end:
|
|
+ up_write(&pcistub_sem);
|
|
+ return;
|
|
+}
|
|
+
|
|
+/*add pciback AER handling*/
|
|
+static struct pci_error_handlers pciback_error_handler = {
|
|
+ .error_detected = pciback_error_detected,
|
|
+ .mmio_enabled = pciback_mmio_enabled,
|
|
+ .slot_reset = pciback_slot_reset,
|
|
+ .resume = pciback_error_resume,
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
|
|
+ * for a normal device. I don't want it to be loaded automatically.
|
|
+ */
|
|
+
|
|
+static struct pci_driver pciback_pci_driver = {
|
|
+ .name = DRV_NAME,
|
|
+ .id_table = pcistub_ids,
|
|
+ .probe = pcistub_probe,
|
|
+ .remove = pcistub_remove,
|
|
+ .err_handler = &pciback_error_handler,
|
|
+};
|
|
+
|
|
+static inline int str_to_slot(const char *buf, int *domain, int *bus,
|
|
+ int *slot, int *func)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
|
|
+ if (err == 4)
|
|
+ return 0;
|
|
+ else if (err < 0)
|
|
+ return -EINVAL;
|
|
+
|
|
+ /* try again without domain */
|
|
+ *domain = 0;
|
|
+ err = sscanf(buf, " %x:%x.%x", bus, slot, func);
|
|
+ if (err == 3)
|
|
+ return 0;
|
|
+
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
|
|
+ *slot, int *func, int *reg, int *size, int *mask)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err =
|
|
+ sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
|
|
+ func, reg, size, mask);
|
|
+ if (err == 7)
|
|
+ return 0;
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+static int pcistub_device_id_add(int domain, int bus, int slot, int func)
|
|
+{
|
|
+ struct pcistub_device_id *pci_dev_id;
|
|
+ unsigned long flags;
|
|
+
|
|
+ pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
|
|
+ if (!pci_dev_id)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ pci_dev_id->domain = domain;
|
|
+ pci_dev_id->bus = bus;
|
|
+ pci_dev_id->devfn = PCI_DEVFN(slot, func);
|
|
+
|
|
+ pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
|
|
+ domain, bus, slot, func);
|
|
+
|
|
+ spin_lock_irqsave(&device_ids_lock, flags);
|
|
+ list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
|
|
+ spin_unlock_irqrestore(&device_ids_lock, flags);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
|
|
+{
|
|
+ struct pcistub_device_id *pci_dev_id, *t;
|
|
+ int devfn = PCI_DEVFN(slot, func);
|
|
+ int err = -ENOENT;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&device_ids_lock, flags);
|
|
+ list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
|
|
+ slot_list) {
|
|
+ if (pci_dev_id->domain == domain
|
|
+ && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
|
|
+ /* Don't break; here because it's possible the same
|
|
+ * slot could be in the list more than once
|
|
+ */
|
|
+ list_del(&pci_dev_id->slot_list);
|
|
+ kfree(pci_dev_id);
|
|
+
|
|
+ err = 0;
|
|
+
|
|
+ pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
|
|
+ "seize list\n", domain, bus, slot, func);
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&device_ids_lock, flags);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
|
|
+ int size, int mask)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pci_dev *dev;
|
|
+ struct config_field *field;
|
|
+
|
|
+ psdev = pcistub_device_find(domain, bus, slot, func);
|
|
+ if (!psdev || !psdev->dev) {
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+ dev = psdev->dev;
|
|
+
|
|
+ field = kzalloc(sizeof(*field), GFP_ATOMIC);
|
|
+ if (!field) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ field->offset = reg;
|
|
+ field->size = size;
|
|
+ field->mask = mask;
|
|
+ field->init = NULL;
|
|
+ field->reset = NULL;
|
|
+ field->release = NULL;
|
|
+ field->clean = pciback_config_field_free;
|
|
+
|
|
+ err = pciback_config_quirks_add_field(dev, field);
|
|
+ if (err)
|
|
+ kfree(field);
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ int domain, bus, slot, func;
|
|
+ int err;
|
|
+
|
|
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = pcistub_device_id_add(domain, bus, slot, func);
|
|
+
|
|
+out:
|
|
+ if (!err)
|
|
+ err = count;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
|
|
+
|
|
+static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ int domain, bus, slot, func;
|
|
+ int err;
|
|
+
|
|
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = pcistub_device_id_remove(domain, bus, slot, func);
|
|
+
|
|
+out:
|
|
+ if (!err)
|
|
+ err = count;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
|
|
+
|
|
+static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
|
|
+{
|
|
+ struct pcistub_device_id *pci_dev_id;
|
|
+ size_t count = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&device_ids_lock, flags);
|
|
+ list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
|
|
+ if (count >= PAGE_SIZE)
|
|
+ break;
|
|
+
|
|
+ count += scnprintf(buf + count, PAGE_SIZE - count,
|
|
+ "%04x:%02x:%02x.%01x\n",
|
|
+ pci_dev_id->domain, pci_dev_id->bus,
|
|
+ PCI_SLOT(pci_dev_id->devfn),
|
|
+ PCI_FUNC(pci_dev_id->devfn));
|
|
+ }
|
|
+ spin_unlock_irqrestore(&device_ids_lock, flags);
|
|
+
|
|
+ return count;
|
|
+}
|
|
+
|
|
+DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
|
|
+
|
|
+static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ size_t count = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (count >= PAGE_SIZE)
|
|
+ break;
|
|
+ if (!psdev->dev)
|
|
+ continue;
|
|
+ dev_data = pci_get_drvdata(psdev->dev);
|
|
+ if (!dev_data)
|
|
+ continue;
|
|
+ count +=
|
|
+ scnprintf(buf + count, PAGE_SIZE - count,
|
|
+ "%s:%s:%sing:%ld\n",
|
|
+ pci_name(psdev->dev),
|
|
+ dev_data->isr_on ? "on" : "off",
|
|
+ dev_data->ack_intr ? "ack" : "not ack",
|
|
+ dev_data->handled);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+ return count;
|
|
+}
|
|
+
|
|
+DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
|
|
+
|
|
+static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
|
|
+ const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ int domain, bus, slot, func;
|
|
+ int err = -ENOENT;
|
|
+
|
|
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ psdev = pcistub_device_find(domain, bus, slot, func);
|
|
+
|
|
+ if (!psdev)
|
|
+ goto out;
|
|
+
|
|
+ dev_data = pci_get_drvdata(psdev->dev);
|
|
+ if (!dev_data)
|
|
+ goto out;
|
|
+
|
|
+ dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
|
|
+ dev_data->irq_name, dev_data->isr_on,
|
|
+ !dev_data->isr_on);
|
|
+
|
|
+ dev_data->isr_on = !(dev_data->isr_on);
|
|
+ if (dev_data->isr_on)
|
|
+ dev_data->ack_intr = 1;
|
|
+out:
|
|
+ if (!err)
|
|
+ err = count;
|
|
+ return err;
|
|
+}
|
|
+DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
|
|
+
|
|
+static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ int domain, bus, slot, func, reg, size, mask;
|
|
+ int err;
|
|
+
|
|
+ err = str_to_quirk(buf, &domain, &bus, &slot, &func, ®, &size,
|
|
+ &mask);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
|
|
+
|
|
+out:
|
|
+ if (!err)
|
|
+ err = count;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
|
|
+{
|
|
+ int count = 0;
|
|
+ unsigned long flags;
|
|
+ struct pciback_config_quirk *quirk;
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ const struct config_field *field;
|
|
+ const struct config_field_entry *cfg_entry;
|
|
+
|
|
+ spin_lock_irqsave(&device_ids_lock, flags);
|
|
+ list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
|
|
+ if (count >= PAGE_SIZE)
|
|
+ goto out;
|
|
+
|
|
+ count += scnprintf(buf + count, PAGE_SIZE - count,
|
|
+ "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
|
|
+ quirk->pdev->bus->number,
|
|
+ PCI_SLOT(quirk->pdev->devfn),
|
|
+ PCI_FUNC(quirk->pdev->devfn),
|
|
+ quirk->devid.vendor, quirk->devid.device,
|
|
+ quirk->devid.subvendor,
|
|
+ quirk->devid.subdevice);
|
|
+
|
|
+ dev_data = pci_get_drvdata(quirk->pdev);
|
|
+
|
|
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
|
+ field = cfg_entry->field;
|
|
+ if (count >= PAGE_SIZE)
|
|
+ goto out;
|
|
+
|
|
+ count += scnprintf(buf + count, PAGE_SIZE - count,
|
|
+ "\t\t%08x:%01x:%08x\n",
|
|
+ cfg_entry->base_offset +
|
|
+ field->offset, field->size,
|
|
+ field->mask);
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ spin_unlock_irqrestore(&device_ids_lock, flags);
|
|
+
|
|
+ return count;
|
|
+}
|
|
+
|
|
+DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
|
|
+
|
|
+static ssize_t permissive_add(struct device_driver *drv, const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ int domain, bus, slot, func;
|
|
+ int err;
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ psdev = pcistub_device_find(domain, bus, slot, func);
|
|
+ if (!psdev) {
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+ if (!psdev->dev) {
|
|
+ err = -ENODEV;
|
|
+ goto release;
|
|
+ }
|
|
+ dev_data = pci_get_drvdata(psdev->dev);
|
|
+ /* the driver data for a device should never be null at this point */
|
|
+ if (!dev_data) {
|
|
+ err = -ENXIO;
|
|
+ goto release;
|
|
+ }
|
|
+ if (!dev_data->permissive) {
|
|
+ dev_data->permissive = 1;
|
|
+ /* Let user know that what they're doing could be unsafe */
|
|
+ dev_warn(&psdev->dev->dev, "enabling permissive mode "
|
|
+ "configuration space accesses!\n");
|
|
+ dev_warn(&psdev->dev->dev,
|
|
+ "permissive mode is potentially unsafe!\n");
|
|
+ }
|
|
+release:
|
|
+ pcistub_device_put(psdev);
|
|
+out:
|
|
+ if (!err)
|
|
+ err = count;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static ssize_t permissive_show(struct device_driver *drv, char *buf)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ size_t count = 0;
|
|
+ unsigned long flags;
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (count >= PAGE_SIZE)
|
|
+ break;
|
|
+ if (!psdev->dev)
|
|
+ continue;
|
|
+ dev_data = pci_get_drvdata(psdev->dev);
|
|
+ if (!dev_data || !dev_data->permissive)
|
|
+ continue;
|
|
+ count +=
|
|
+ scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
|
|
+ pci_name(psdev->dev));
|
|
+ }
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+ return count;
|
|
+}
|
|
+
|
|
+DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
|
|
+
|
|
+static void pcistub_exit(void)
|
|
+{
|
|
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
|
|
+ driver_remove_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_remove_slot);
|
|
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
|
|
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
|
|
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
|
|
+ driver_remove_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_irq_handlers);
|
|
+ driver_remove_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_irq_handler_state);
|
|
+ pci_unregister_driver(&pciback_pci_driver);
|
|
+}
|
|
+
|
|
+static int __init pcistub_init(void)
|
|
+{
|
|
+ int pos = 0;
|
|
+ int err = 0;
|
|
+ int domain, bus, slot, func;
|
|
+ int parsed;
|
|
+
|
|
+ if (pci_devs_to_hide && *pci_devs_to_hide) {
|
|
+ do {
|
|
+ parsed = 0;
|
|
+
|
|
+ err = sscanf(pci_devs_to_hide + pos,
|
|
+ " (%x:%x:%x.%x) %n",
|
|
+ &domain, &bus, &slot, &func, &parsed);
|
|
+ if (err != 4) {
|
|
+ domain = 0;
|
|
+ err = sscanf(pci_devs_to_hide + pos,
|
|
+ " (%x:%x.%x) %n",
|
|
+ &bus, &slot, &func, &parsed);
|
|
+ if (err != 3)
|
|
+ goto parse_error;
|
|
+ }
|
|
+
|
|
+ err = pcistub_device_id_add(domain, bus, slot, func);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* if parsed<=0, we've reached the end of the string */
|
|
+ pos += parsed;
|
|
+ } while (parsed > 0 && pci_devs_to_hide[pos]);
|
|
+ }
|
|
+
|
|
+ /* If we're the first PCI Device Driver to register, we're the
|
|
+ * first one to get offered PCI devices as they become
|
|
+ * available (and thus we can be the first to grab them)
|
|
+ */
|
|
+ err = pci_register_driver(&pciback_pci_driver);
|
|
+ if (err < 0)
|
|
+ goto out;
|
|
+
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_new_slot);
|
|
+ if (!err)
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_remove_slot);
|
|
+ if (!err)
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_slots);
|
|
+ if (!err)
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_quirks);
|
|
+ if (!err)
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_permissive);
|
|
+
|
|
+ if (!err)
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_irq_handlers);
|
|
+ if (!err)
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_irq_handler_state);
|
|
+ if (err)
|
|
+ pcistub_exit();
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+
|
|
+parse_error:
|
|
+ printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
|
|
+ pci_devs_to_hide + pos);
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+#ifndef MODULE
|
|
+/*
|
|
+ * fs_initcall happens before device_initcall
|
|
+ * so pciback *should* get called first (b/c we
|
|
+ * want to suck up any device before other drivers
|
|
+ * get a chance by being the first pci device
|
|
+ * driver to register)
|
|
+ */
|
|
+fs_initcall(pcistub_init);
|
|
+#endif
|
|
+
|
|
+static int __init pciback_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ if (!xen_initial_domain())
|
|
+ return -ENODEV;
|
|
+
|
|
+ err = pciback_config_init();
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+#ifdef MODULE
|
|
+ err = pcistub_init();
|
|
+ if (err < 0)
|
|
+ return err;
|
|
+#endif
|
|
+
|
|
+ pcistub_init_devices_late();
|
|
+ err = pciback_xenbus_register();
|
|
+ if (err)
|
|
+ pcistub_exit();
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void __exit pciback_cleanup(void)
|
|
+{
|
|
+ pciback_xenbus_unregister();
|
|
+ pcistub_exit();
|
|
+}
|
|
+
|
|
+module_init(pciback_init);
|
|
+module_exit(pciback_cleanup);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
|
|
new file mode 100644
|
|
index 0000000..5c14020
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/pciback.h
|
|
@@ -0,0 +1,142 @@
|
|
+/*
|
|
+ * PCI Backend Common Data Structures & Function Declarations
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+#ifndef __XEN_PCIBACK_H__
|
|
+#define __XEN_PCIBACK_H__
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/workqueue.h>
|
|
+#include <linux/atomic.h>
|
|
+#include <xen/interface/io/pciif.h>
|
|
+
|
|
+struct pci_dev_entry {
|
|
+ struct list_head list;
|
|
+ struct pci_dev *dev;
|
|
+};
|
|
+
|
|
+#define _PDEVF_op_active (0)
|
|
+#define PDEVF_op_active (1<<(_PDEVF_op_active))
|
|
+#define _PCIB_op_pending (1)
|
|
+#define PCIB_op_pending (1<<(_PCIB_op_pending))
|
|
+
|
|
+struct pciback_device {
|
|
+ void *pci_dev_data;
|
|
+ spinlock_t dev_lock;
|
|
+
|
|
+ struct xenbus_device *xdev;
|
|
+
|
|
+ struct xenbus_watch be_watch;
|
|
+ u8 be_watching;
|
|
+
|
|
+ int evtchn_irq;
|
|
+
|
|
+ struct xen_pci_sharedinfo *sh_info;
|
|
+
|
|
+ unsigned long flags;
|
|
+
|
|
+ struct work_struct op_work;
|
|
+};
|
|
+
|
|
+struct pciback_dev_data {
|
|
+ struct list_head config_fields;
|
|
+ unsigned int permissive:1;
|
|
+ unsigned int warned_on_write:1;
|
|
+ unsigned int enable_intx:1;
|
|
+ unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
|
|
+ unsigned int ack_intr:1; /* .. and ACK-ing */
|
|
+ unsigned long handled;
|
|
+ unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
|
|
+ char irq_name[0]; /* pciback[000:04:00.0] */
|
|
+};
|
|
+
|
|
+/* Used by XenBus and pciback_ops.c */
|
|
+extern wait_queue_head_t aer_wait_queue;
|
|
+extern struct workqueue_struct *pciback_wq;
|
|
+/* Used by pcistub.c and conf_space_quirks.c */
|
|
+extern struct list_head pciback_quirks;
|
|
+
|
|
+/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
|
|
+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
|
|
+ int domain, int bus,
|
|
+ int slot, int func);
|
|
+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev);
|
|
+void pcistub_put_pci_dev(struct pci_dev *dev);
|
|
+
|
|
+/* Ensure a device is turned off or reset */
|
|
+void pciback_reset_device(struct pci_dev *pdev);
|
|
+
|
|
+/* Access a virtual configuration space for a PCI device */
|
|
+int pciback_config_init(void);
|
|
+int pciback_config_init_dev(struct pci_dev *dev);
|
|
+void pciback_config_free_dyn_fields(struct pci_dev *dev);
|
|
+void pciback_config_reset_dev(struct pci_dev *dev);
|
|
+void pciback_config_free_dev(struct pci_dev *dev);
|
|
+int pciback_config_read(struct pci_dev *dev, int offset, int size,
|
|
+ u32 *ret_val);
|
|
+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
|
|
+
|
|
+/* Handle requests for specific devices from the frontend */
|
|
+typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn, unsigned int devid);
|
|
+typedef int (*publish_pci_root_cb) (struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus);
|
|
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
|
|
+ int devid, publish_pci_dev_cb publish_cb);
|
|
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
|
|
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn);
|
|
+
|
|
+/**
|
|
+* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
|
|
+* before sending aer request to pcifront, so that guest could identify
|
|
+* device, coopearte with pciback to finish aer recovery job if device driver
|
|
+* has the capability
|
|
+*/
|
|
+
|
|
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
|
|
+ struct pciback_device *pdev,
|
|
+ unsigned int *domain, unsigned int *bus,
|
|
+ unsigned int *devfn);
|
|
+int pciback_init_devices(struct pciback_device *pdev);
|
|
+int pciback_publish_pci_roots(struct pciback_device *pdev,
|
|
+ publish_pci_root_cb cb);
|
|
+void pciback_release_devices(struct pciback_device *pdev);
|
|
+
|
|
+/* Handles events from front-end */
|
|
+irqreturn_t pciback_handle_event(int irq, void *dev_id);
|
|
+void pciback_do_op(struct work_struct *data);
|
|
+
|
|
+int pciback_xenbus_register(void);
|
|
+void pciback_xenbus_unregister(void);
|
|
+
|
|
+#ifdef CONFIG_PCI_MSI
|
|
+int pciback_enable_msi(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op);
|
|
+
|
|
+int pciback_disable_msi(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op);
|
|
+
|
|
+
|
|
+int pciback_enable_msix(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op);
|
|
+
|
|
+int pciback_disable_msix(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op);
|
|
+#endif
|
|
+extern int verbose_request;
|
|
+
|
|
+void test_and_schedule_op(struct pciback_device *pdev);
|
|
+#endif
|
|
+
|
|
+/* Handles shared IRQs that can to device domain and control domain. */
|
|
+void pciback_irq_handler(struct pci_dev *dev, int reset);
|
|
+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id);
|
|
diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
|
|
new file mode 100644
|
|
index 0000000..28a2a55
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/pciback_ops.c
|
|
@@ -0,0 +1,248 @@
|
|
+/*
|
|
+ * PCI Backend Operations - respond to PCI requests from Frontend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/wait.h>
|
|
+#include <linux/bitops.h>
|
|
+#include <xen/events.h>
|
|
+#include <linux/sched.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+int verbose_request;
|
|
+module_param(verbose_request, int, 0644);
|
|
+
|
|
+/* Ensure a device is has the fake IRQ handler "turned on/off" and is
|
|
+ * ready to be exported. This MUST be run after pciback_reset_device
|
|
+ * which does the actual PCI device enable/disable.
|
|
+ */
|
|
+void pciback_control_isr(struct pci_dev *dev, int reset)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ int rc;
|
|
+ int enable = 0;
|
|
+
|
|
+ dev_data = pci_get_drvdata(dev);
|
|
+ if (!dev_data)
|
|
+ return;
|
|
+
|
|
+ /* We don't deal with bridges */
|
|
+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
|
|
+ return;
|
|
+
|
|
+ if (reset) {
|
|
+ dev_data->enable_intx = 0;
|
|
+ dev_data->ack_intr = 0;
|
|
+ }
|
|
+ enable = dev_data->enable_intx;
|
|
+
|
|
+ /* Asked to disable, but ISR isn't runnig */
|
|
+ if (!enable && !dev_data->isr_on)
|
|
+ return;
|
|
+
|
|
+ /* Squirrel away the IRQs in the dev_data. We need this
|
|
+ * b/c when device transitions to MSI, the dev->irq is
|
|
+ * overwritten with the MSI vector.
|
|
+ */
|
|
+ if (enable)
|
|
+ dev_data->irq = dev->irq;
|
|
+
|
|
+ /*
|
|
+ * SR-IOV devices in all use MSI-X and have no legacy
|
|
+ * interrupts, so inhibit creating a fake IRQ handler for them.
|
|
+ */
|
|
+ if (dev_data->irq == 0)
|
|
+ goto out;
|
|
+
|
|
+ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
|
|
+ dev_data->irq_name,
|
|
+ dev_data->irq,
|
|
+ pci_is_enabled(dev) ? "on" : "off",
|
|
+ dev->msi_enabled ? "MSI" : "",
|
|
+ dev->msix_enabled ? "MSI/X" : "",
|
|
+ dev_data->isr_on ? "enable" : "disable",
|
|
+ enable ? "enable" : "disable");
|
|
+
|
|
+ if (enable) {
|
|
+ rc = request_irq(dev_data->irq,
|
|
+ pciback_guest_interrupt, IRQF_SHARED,
|
|
+ dev_data->irq_name, dev);
|
|
+ if (rc) {
|
|
+ dev_err(&dev->dev, "%s: failed to install fake IRQ " \
|
|
+ "handler for IRQ %d! (rc:%d)\n",
|
|
+ dev_data->irq_name, dev_data->irq, rc);
|
|
+ goto out;
|
|
+ }
|
|
+ } else {
|
|
+ free_irq(dev_data->irq, dev);
|
|
+ dev_data->irq = 0;
|
|
+ }
|
|
+ dev_data->isr_on = enable;
|
|
+ dev_data->ack_intr = enable;
|
|
+out:
|
|
+ dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
|
|
+ dev_data->irq_name,
|
|
+ dev_data->irq,
|
|
+ pci_is_enabled(dev) ? "on" : "off",
|
|
+ dev->msi_enabled ? "MSI" : "",
|
|
+ dev->msix_enabled ? "MSI/X" : "",
|
|
+ enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
|
|
+ (dev_data->isr_on ? "failed to disable" : "disabled"));
|
|
+}
|
|
+
|
|
+/* Ensure a device is "turned off" and ready to be exported.
|
|
+ * (Also see pciback_config_reset to ensure virtual configuration space is
|
|
+ * ready to be re-exported)
|
|
+ */
|
|
+void pciback_reset_device(struct pci_dev *dev)
|
|
+{
|
|
+ u16 cmd;
|
|
+
|
|
+ pciback_control_isr(dev, 1 /* reset device */);
|
|
+
|
|
+ /* Disable devices (but not bridges) */
|
|
+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
|
|
+#ifdef CONFIG_PCI_MSI
|
|
+ /* The guest could have been abruptly killed without
|
|
+ * disabling MSI/MSI-X interrupts.*/
|
|
+ if (dev->msix_enabled)
|
|
+ pci_disable_msix(dev);
|
|
+ if (dev->msi_enabled)
|
|
+ pci_disable_msi(dev);
|
|
+#endif
|
|
+ pci_disable_device(dev);
|
|
+
|
|
+ pci_write_config_word(dev, PCI_COMMAND, 0);
|
|
+
|
|
+ dev->is_busmaster = 0;
|
|
+ } else {
|
|
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
|
|
+ if (cmd & (PCI_COMMAND_INVALIDATE)) {
|
|
+ cmd &= ~(PCI_COMMAND_INVALIDATE);
|
|
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
|
|
+
|
|
+ dev->is_busmaster = 0;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+/*
|
|
+* Now the same evtchn is used for both pcifront conf_read_write request
|
|
+* as well as pcie aer front end ack. We use a new work_queue to schedule
|
|
+* pciback conf_read_write service for avoiding confict with aer_core
|
|
+* do_recovery job which also use the system default work_queue
|
|
+*/
|
|
+void test_and_schedule_op(struct pciback_device *pdev)
|
|
+{
|
|
+ /* Check that frontend is requesting an operation and that we are not
|
|
+ * already processing a request */
|
|
+ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
|
|
+ && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
|
|
+ queue_work(pciback_wq, &pdev->op_work);
|
|
+ }
|
|
+ /*_XEN_PCIB_active should have been cleared by pcifront. And also make
|
|
+ sure pciback is waiting for ack by checking _PCIB_op_pending*/
|
|
+ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
|
|
+ && test_bit(_PCIB_op_pending, &pdev->flags)) {
|
|
+ wake_up(&aer_wait_queue);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Performing the configuration space reads/writes must not be done in atomic
|
|
+ * context because some of the pci_* functions can sleep (mostly due to ACPI
|
|
+ * use of semaphores). This function is intended to be called from a work
|
|
+ * queue in process context taking a struct pciback_device as a parameter */
|
|
+
|
|
+void pciback_do_op(struct work_struct *data)
|
|
+{
|
|
+ struct pciback_device *pdev =
|
|
+ container_of(data, struct pciback_device, op_work);
|
|
+ struct pci_dev *dev;
|
|
+ struct pciback_dev_data *dev_data = NULL;
|
|
+ struct xen_pci_op *op = &pdev->sh_info->op;
|
|
+ int test_intx = 0;
|
|
+
|
|
+ dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
|
|
+
|
|
+ if (dev == NULL)
|
|
+ op->err = XEN_PCI_ERR_dev_not_found;
|
|
+ else {
|
|
+ dev_data = pci_get_drvdata(dev);
|
|
+ if (dev_data)
|
|
+ test_intx = dev_data->enable_intx;
|
|
+ switch (op->cmd) {
|
|
+ case XEN_PCI_OP_conf_read:
|
|
+ op->err = pciback_config_read(dev,
|
|
+ op->offset, op->size, &op->value);
|
|
+ break;
|
|
+ case XEN_PCI_OP_conf_write:
|
|
+ op->err = pciback_config_write(dev,
|
|
+ op->offset, op->size, op->value);
|
|
+ break;
|
|
+#ifdef CONFIG_PCI_MSI
|
|
+ case XEN_PCI_OP_enable_msi:
|
|
+ op->err = pciback_enable_msi(pdev, dev, op);
|
|
+ break;
|
|
+ case XEN_PCI_OP_disable_msi:
|
|
+ op->err = pciback_disable_msi(pdev, dev, op);
|
|
+ break;
|
|
+ case XEN_PCI_OP_enable_msix:
|
|
+ op->err = pciback_enable_msix(pdev, dev, op);
|
|
+ break;
|
|
+ case XEN_PCI_OP_disable_msix:
|
|
+ op->err = pciback_disable_msix(pdev, dev, op);
|
|
+ break;
|
|
+#endif
|
|
+ default:
|
|
+ op->err = XEN_PCI_ERR_not_implemented;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ if (!op->err && dev && dev_data) {
|
|
+ /* Transition detected */
|
|
+ if ((dev_data->enable_intx != test_intx))
|
|
+ pciback_control_isr(dev, 0 /* no reset */);
|
|
+ }
|
|
+ /* Tell the driver domain that we're done. */
|
|
+ wmb();
|
|
+ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
|
|
+ notify_remote_via_irq(pdev->evtchn_irq);
|
|
+
|
|
+ /* Mark that we're done. */
|
|
+ smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
|
|
+ clear_bit(_PDEVF_op_active, &pdev->flags);
|
|
+ smp_mb__after_clear_bit(); /* /before/ final check for work */
|
|
+
|
|
+ /* Check to see if the driver domain tried to start another request in
|
|
+ * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
|
|
+ */
|
|
+ test_and_schedule_op(pdev);
|
|
+}
|
|
+
|
|
+irqreturn_t pciback_handle_event(int irq, void *dev_id)
|
|
+{
|
|
+ struct pciback_device *pdev = dev_id;
|
|
+
|
|
+ test_and_schedule_op(pdev);
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
|
|
+{
|
|
+ struct pci_dev *dev = (struct pci_dev *)dev_id;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+
|
|
+ if (dev_data->isr_on && dev_data->ack_intr) {
|
|
+ dev_data->handled++;
|
|
+ if ((dev_data->handled % 1000) == 0) {
|
|
+ if (xen_test_irq_shared(irq)) {
|
|
+ printk(KERN_INFO "%s IRQ line is not shared "
|
|
+ "with other domains. Turning ISR off\n",
|
|
+ dev_data->irq_name);
|
|
+ dev_data->ack_intr = 0;
|
|
+ }
|
|
+ }
|
|
+ return IRQ_HANDLED;
|
|
+ }
|
|
+ return IRQ_NONE;
|
|
+}
|
|
diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c
|
|
new file mode 100644
|
|
index 0000000..efb922d
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/slot.c
|
|
@@ -0,0 +1,191 @@
|
|
+/*
|
|
+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
|
|
+ * to the frontend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> (vpci.c)
|
|
+ * Author: Tristan Gingold <tristan.gingold@bull.net>, from vpci.c
|
|
+ */
|
|
+
|
|
+#include <linux/list.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+/* There are at most 32 slots in a pci bus. */
|
|
+#define PCI_SLOT_MAX 32
|
|
+
|
|
+#define PCI_BUS_NBR 2
|
|
+
|
|
+struct slot_dev_data {
|
|
+ /* Access to dev_list must be protected by lock */
|
|
+ struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
|
|
+ spinlock_t lock;
|
|
+};
|
|
+
|
|
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn)
|
|
+{
|
|
+ struct pci_dev *dev = NULL;
|
|
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (domain != 0 || PCI_FUNC(devfn) != 0)
|
|
+ return NULL;
|
|
+
|
|
+ if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
|
|
+ return NULL;
|
|
+
|
|
+ spin_lock_irqsave(&slot_dev->lock, flags);
|
|
+ dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
|
|
+ spin_unlock_irqrestore(&slot_dev->lock, flags);
|
|
+
|
|
+ return dev;
|
|
+}
|
|
+
|
|
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
|
|
+ int devid, publish_pci_dev_cb publish_cb)
|
|
+{
|
|
+ int err = 0, slot, bus;
|
|
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
|
|
+ err = -EFAULT;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Can't export bridges on the virtual PCI bus");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&slot_dev->lock, flags);
|
|
+
|
|
+ /* Assign to a new slot on the virtual PCI bus */
|
|
+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ if (slot_dev->slots[bus][slot] == NULL) {
|
|
+ printk(KERN_INFO
|
|
+ "pciback: slot: %s: assign to virtual "
|
|
+ "slot %d, bus %d\n",
|
|
+ pci_name(dev), slot, bus);
|
|
+ slot_dev->slots[bus][slot] = dev;
|
|
+ goto unlock;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "No more space on root virtual PCI bus");
|
|
+
|
|
+unlock:
|
|
+ spin_unlock_irqrestore(&slot_dev->lock, flags);
|
|
+
|
|
+ /* Publish this device. */
|
|
+ if (!err)
|
|
+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
|
|
+{
|
|
+ int slot, bus;
|
|
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&slot_dev->lock, flags);
|
|
+
|
|
+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ if (slot_dev->slots[bus][slot] == dev) {
|
|
+ slot_dev->slots[bus][slot] = NULL;
|
|
+ found_dev = dev;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ spin_unlock_irqrestore(&slot_dev->lock, flags);
|
|
+
|
|
+ if (found_dev)
|
|
+ pcistub_put_pci_dev(found_dev);
|
|
+}
|
|
+
|
|
+int pciback_init_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ int slot, bus;
|
|
+ struct slot_dev_data *slot_dev;
|
|
+
|
|
+ slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
|
|
+ if (!slot_dev)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock_init(&slot_dev->lock);
|
|
+
|
|
+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++)
|
|
+ slot_dev->slots[bus][slot] = NULL;
|
|
+
|
|
+ pdev->pci_dev_data = slot_dev;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_publish_pci_roots(struct pciback_device *pdev,
|
|
+ publish_pci_root_cb publish_cb)
|
|
+{
|
|
+ /* The Virtual PCI bus has only one root */
|
|
+ return publish_cb(pdev, 0, 0);
|
|
+}
|
|
+
|
|
+void pciback_release_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ int slot, bus;
|
|
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
|
|
+ struct pci_dev *dev;
|
|
+
|
|
+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ dev = slot_dev->slots[bus][slot];
|
|
+ if (dev != NULL)
|
|
+ pcistub_put_pci_dev(dev);
|
|
+ }
|
|
+
|
|
+ kfree(slot_dev);
|
|
+ pdev->pci_dev_data = NULL;
|
|
+}
|
|
+
|
|
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
|
|
+ struct pciback_device *pdev,
|
|
+ unsigned int *domain, unsigned int *bus,
|
|
+ unsigned int *devfn)
|
|
+{
|
|
+ int slot, busnr;
|
|
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
|
|
+ struct pci_dev *dev;
|
|
+ int found = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&slot_dev->lock, flags);
|
|
+
|
|
+ for (busnr = 0; busnr < PCI_BUS_NBR; bus++)
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ dev = slot_dev->slots[busnr][slot];
|
|
+ if (dev && dev->bus->number == pcidev->bus->number
|
|
+ && dev->devfn == pcidev->devfn
|
|
+ && pci_domain_nr(dev->bus) ==
|
|
+ pci_domain_nr(pcidev->bus)) {
|
|
+ found = 1;
|
|
+ *domain = 0;
|
|
+ *bus = busnr;
|
|
+ *devfn = PCI_DEVFN(slot, 0);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+out:
|
|
+ spin_unlock_irqrestore(&slot_dev->lock, flags);
|
|
+ return found;
|
|
+
|
|
+}
|
|
diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
|
|
new file mode 100644
|
|
index 0000000..2857ab8
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/vpci.c
|
|
@@ -0,0 +1,244 @@
|
|
+/*
|
|
+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
|
|
+ * to the frontend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/list.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+#define PCI_SLOT_MAX 32
|
|
+
|
|
+struct vpci_dev_data {
|
|
+ /* Access to dev_list must be protected by lock */
|
|
+ struct list_head dev_list[PCI_SLOT_MAX];
|
|
+ spinlock_t lock;
|
|
+};
|
|
+
|
|
+static inline struct list_head *list_first(struct list_head *head)
|
|
+{
|
|
+ return head->next;
|
|
+}
|
|
+
|
|
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn)
|
|
+{
|
|
+ struct pci_dev_entry *entry;
|
|
+ struct pci_dev *dev = NULL;
|
|
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (domain != 0 || bus != 0)
|
|
+ return NULL;
|
|
+
|
|
+ if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
|
|
+ spin_lock_irqsave(&vpci_dev->lock, flags);
|
|
+
|
|
+ list_for_each_entry(entry,
|
|
+ &vpci_dev->dev_list[PCI_SLOT(devfn)],
|
|
+ list) {
|
|
+ if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
|
|
+ dev = entry->dev;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
|
+ }
|
|
+ return dev;
|
|
+}
|
|
+
|
|
+static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
|
|
+{
|
|
+ if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
|
|
+ && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
|
|
+ return 1;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
|
|
+ int devid, publish_pci_dev_cb publish_cb)
|
|
+{
|
|
+ int err = 0, slot, func = -1;
|
|
+ struct pci_dev_entry *t, *dev_entry;
|
|
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
|
|
+ err = -EFAULT;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Can't export bridges on the virtual PCI bus");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
|
|
+ if (!dev_entry) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error adding entry to virtual PCI bus");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_entry->dev = dev;
|
|
+
|
|
+ spin_lock_irqsave(&vpci_dev->lock, flags);
|
|
+
|
|
+ /* Keep multi-function devices together on the virtual PCI bus */
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ if (!list_empty(&vpci_dev->dev_list[slot])) {
|
|
+ t = list_entry(list_first(&vpci_dev->dev_list[slot]),
|
|
+ struct pci_dev_entry, list);
|
|
+
|
|
+ if (match_slot(dev, t->dev)) {
|
|
+ pr_info("pciback: vpci: %s: "
|
|
+ "assign to virtual slot %d func %d\n",
|
|
+ pci_name(dev), slot,
|
|
+ PCI_FUNC(dev->devfn));
|
|
+ list_add_tail(&dev_entry->list,
|
|
+ &vpci_dev->dev_list[slot]);
|
|
+ func = PCI_FUNC(dev->devfn);
|
|
+ goto unlock;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Assign to a new slot on the virtual PCI bus */
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ if (list_empty(&vpci_dev->dev_list[slot])) {
|
|
+ printk(KERN_INFO
|
|
+ "pciback: vpci: %s: assign to virtual slot %d\n",
|
|
+ pci_name(dev), slot);
|
|
+ list_add_tail(&dev_entry->list,
|
|
+ &vpci_dev->dev_list[slot]);
|
|
+ func = PCI_FUNC(dev->devfn);
|
|
+ goto unlock;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "No more space on root virtual PCI bus");
|
|
+
|
|
+unlock:
|
|
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
|
+
|
|
+ /* Publish this device. */
|
|
+ if (!err)
|
|
+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
|
|
+{
|
|
+ int slot;
|
|
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&vpci_dev->lock, flags);
|
|
+
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ struct pci_dev_entry *e, *tmp;
|
|
+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
|
|
+ list) {
|
|
+ if (e->dev == dev) {
|
|
+ list_del(&e->list);
|
|
+ found_dev = e->dev;
|
|
+ kfree(e);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
|
+
|
|
+ if (found_dev)
|
|
+ pcistub_put_pci_dev(found_dev);
|
|
+}
|
|
+
|
|
+int pciback_init_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ int slot;
|
|
+ struct vpci_dev_data *vpci_dev;
|
|
+
|
|
+ vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
|
|
+ if (!vpci_dev)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock_init(&vpci_dev->lock);
|
|
+
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++)
|
|
+ INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
|
|
+
|
|
+ pdev->pci_dev_data = vpci_dev;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_publish_pci_roots(struct pciback_device *pdev,
|
|
+ publish_pci_root_cb publish_cb)
|
|
+{
|
|
+ /* The Virtual PCI bus has only one root */
|
|
+ return publish_cb(pdev, 0, 0);
|
|
+}
|
|
+
|
|
+void pciback_release_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ int slot;
|
|
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
|
+
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ struct pci_dev_entry *e, *tmp;
|
|
+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
|
|
+ list) {
|
|
+ list_del(&e->list);
|
|
+ pcistub_put_pci_dev(e->dev);
|
|
+ kfree(e);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ kfree(vpci_dev);
|
|
+ pdev->pci_dev_data = NULL;
|
|
+}
|
|
+
|
|
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
|
|
+ struct pciback_device *pdev,
|
|
+ unsigned int *domain, unsigned int *bus,
|
|
+ unsigned int *devfn)
|
|
+{
|
|
+ struct pci_dev_entry *entry;
|
|
+ struct pci_dev *dev = NULL;
|
|
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
|
+ unsigned long flags;
|
|
+ int found = 0, slot;
|
|
+
|
|
+ spin_lock_irqsave(&vpci_dev->lock, flags);
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ list_for_each_entry(entry,
|
|
+ &vpci_dev->dev_list[slot],
|
|
+ list) {
|
|
+ dev = entry->dev;
|
|
+ if (dev && dev->bus->number == pcidev->bus->number
|
|
+ && pci_domain_nr(dev->bus) ==
|
|
+ pci_domain_nr(pcidev->bus)
|
|
+ && dev->devfn == pcidev->devfn) {
|
|
+ found = 1;
|
|
+ *domain = 0;
|
|
+ *bus = 0;
|
|
+ *devfn = PCI_DEVFN(slot,
|
|
+ PCI_FUNC(pcidev->devfn));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
|
+ return found;
|
|
+}
|
|
diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
|
|
new file mode 100644
|
|
index 0000000..70030c4
|
|
--- /dev/null
|
|
+++ b/drivers/xen/pciback/xenbus.c
|
|
@@ -0,0 +1,726 @@
|
|
+/*
|
|
+ * PCI Backend Xenbus Setup - handles setup with frontend and xend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/workqueue.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/events.h>
|
|
+#include <asm/xen/pci.h>
|
|
+#include <linux/workqueue.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+#define INVALID_EVTCHN_IRQ (-1)
|
|
+struct workqueue_struct *pciback_wq;
|
|
+
|
|
+static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
|
|
+{
|
|
+ struct pciback_device *pdev;
|
|
+
|
|
+ pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
|
|
+ if (pdev == NULL)
|
|
+ goto out;
|
|
+ dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
|
|
+
|
|
+ pdev->xdev = xdev;
|
|
+ dev_set_drvdata(&xdev->dev, pdev);
|
|
+
|
|
+ spin_lock_init(&pdev->dev_lock);
|
|
+
|
|
+ pdev->sh_info = NULL;
|
|
+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
|
|
+ pdev->be_watching = 0;
|
|
+
|
|
+ INIT_WORK(&pdev->op_work, pciback_do_op);
|
|
+
|
|
+ if (pciback_init_devices(pdev)) {
|
|
+ kfree(pdev);
|
|
+ pdev = NULL;
|
|
+ }
|
|
+out:
|
|
+ return pdev;
|
|
+}
|
|
+
|
|
+static void pciback_disconnect(struct pciback_device *pdev)
|
|
+{
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+
|
|
+ /* Ensure the guest can't trigger our handler before removing devices */
|
|
+ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
|
|
+ unbind_from_irqhandler(pdev->evtchn_irq, pdev);
|
|
+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
|
|
+ }
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+
|
|
+ /* If the driver domain started an op, make sure we complete it
|
|
+ * before releasing the shared memory */
|
|
+
|
|
+ /* Note, the workqueue does not use spinlocks at all.*/
|
|
+ flush_workqueue(pciback_wq);
|
|
+
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+ if (pdev->sh_info != NULL) {
|
|
+ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
|
|
+ pdev->sh_info = NULL;
|
|
+ }
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+
|
|
+}
|
|
+
|
|
+static void free_pdev(struct pciback_device *pdev)
|
|
+{
|
|
+ if (pdev->be_watching) {
|
|
+ unregister_xenbus_watch(&pdev->be_watch);
|
|
+ pdev->be_watching = 0;
|
|
+ }
|
|
+
|
|
+ pciback_disconnect(pdev);
|
|
+
|
|
+ pciback_release_devices(pdev);
|
|
+
|
|
+ dev_set_drvdata(&pdev->xdev->dev, NULL);
|
|
+ pdev->xdev = NULL;
|
|
+
|
|
+ kfree(pdev);
|
|
+}
|
|
+
|
|
+static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
|
|
+ int remote_evtchn)
|
|
+{
|
|
+ int err = 0;
|
|
+ void *vaddr;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev,
|
|
+ "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
|
|
+ gnt_ref, remote_evtchn);
|
|
+
|
|
+ err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error mapping other domain page in ours.");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+ pdev->sh_info = vaddr;
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(
|
|
+ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
|
|
+ 0, "pciback", pdev);
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error binding event channel to IRQ");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+ pdev->evtchn_irq = err;
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+ err = 0;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Attached!\n");
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_attach(struct pciback_device *pdev)
|
|
+{
|
|
+ int err = 0;
|
|
+ int gnt_ref, remote_evtchn;
|
|
+ char *magic = NULL;
|
|
+
|
|
+
|
|
+ /* Make sure we only do this setup once */
|
|
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
|
+ XenbusStateInitialised)
|
|
+ goto out;
|
|
+
|
|
+ /* Wait for frontend to state that it has published the configuration */
|
|
+ if (xenbus_read_driver_state(pdev->xdev->otherend) !=
|
|
+ XenbusStateInitialised)
|
|
+ goto out;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
|
|
+ "pci-op-ref", "%u", &gnt_ref,
|
|
+ "event-channel", "%u", &remote_evtchn,
|
|
+ "magic", NULL, &magic, NULL);
|
|
+ if (err) {
|
|
+ /* If configuration didn't get read correctly, wait longer */
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading configuration from frontend");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, -EFAULT,
|
|
+ "version mismatch (%s/%s) with pcifront - "
|
|
+ "halting pciback",
|
|
+ magic, XEN_PCI_MAGIC);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Connecting...\n");
|
|
+
|
|
+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error switching to connected state!");
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
|
|
+out:
|
|
+
|
|
+ kfree(magic);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_publish_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn, unsigned int devid)
|
|
+{
|
|
+ int err;
|
|
+ int len;
|
|
+ char str[64];
|
|
+
|
|
+ len = snprintf(str, sizeof(str), "vdev-%d", devid);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
|
|
+ "%04x:%02x:%02x.%02x", domain, bus,
|
|
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_export_device(struct pciback_device *pdev,
|
|
+ int domain, int bus, int slot, int func,
|
|
+ int devid)
|
|
+{
|
|
+ struct pci_dev *dev;
|
|
+ int err = 0;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
|
|
+ domain, bus, slot, func);
|
|
+
|
|
+ dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
|
|
+ if (!dev) {
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Couldn't locate PCI device "
|
|
+ "(%04x:%02x:%02x.%01x)! "
|
|
+ "perhaps already in-use?",
|
|
+ domain, bus, slot, func);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
|
|
+ if (xen_register_device_domain_owner(dev,
|
|
+ pdev->xdev->otherend_id) != 0) {
|
|
+ dev_err(&dev->dev, "device has been assigned to another " \
|
|
+ "domain! Over-writting the ownership, but beware.\n");
|
|
+ xen_unregister_device_domain_owner(dev);
|
|
+ xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
|
|
+ }
|
|
+
|
|
+ /* TODO: It'd be nice to export a bridge and have all of its children
|
|
+ * get exported with it. This may be best done in xend (which will
|
|
+ * have to calculate resource usage anyway) but we probably want to
|
|
+ * put something in here to ensure that if a bridge gets given to a
|
|
+ * driver domain, that all devices under that bridge are not given
|
|
+ * to other driver domains (as he who controls the bridge can disable
|
|
+ * it and stop the other devices from working).
|
|
+ */
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_remove_device(struct pciback_device *pdev,
|
|
+ int domain, int bus, int slot, int func)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pci_dev *dev;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
|
|
+ domain, bus, slot, func);
|
|
+
|
|
+ dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
|
|
+ if (!dev) {
|
|
+ err = -EINVAL;
|
|
+ dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
|
|
+ "(%04x:%02x:%02x.%01x)! not owned by this domain\n",
|
|
+ domain, bus, slot, func);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
|
|
+ xen_unregister_device_domain_owner(dev);
|
|
+
|
|
+ pciback_release_pci_dev(pdev, dev);
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_publish_pci_root(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus)
|
|
+{
|
|
+ unsigned int d, b;
|
|
+ int i, root_num, len, err;
|
|
+ char str[64];
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ "root_num", "%d", &root_num);
|
|
+ if (err == 0 || err == -ENOENT)
|
|
+ root_num = 0;
|
|
+ else if (err < 0)
|
|
+ goto out;
|
|
+
|
|
+ /* Verify that we haven't already published this pci root */
|
|
+ for (i = 0; i < root_num; i++) {
|
|
+ len = snprintf(str, sizeof(str), "root-%d", i);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ str, "%x:%x", &d, &b);
|
|
+ if (err < 0)
|
|
+ goto out;
|
|
+ if (err != 2) {
|
|
+ err = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (d == domain && b == bus) {
|
|
+ err = 0;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ len = snprintf(str, sizeof(str), "root-%d", root_num);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
|
|
+ root_num, domain, bus);
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
|
|
+ "%04x:%02x", domain, bus);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
|
|
+ "root_num", "%d", (root_num + 1));
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_reconfigure(struct pciback_device *pdev)
|
|
+{
|
|
+ int err = 0;
|
|
+ int num_devs;
|
|
+ int domain, bus, slot, func;
|
|
+ int substate;
|
|
+ int i, len;
|
|
+ char state_str[64];
|
|
+ char dev_str[64];
|
|
+
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
|
|
+
|
|
+ /* Make sure we only reconfigure once */
|
|
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
|
+ XenbusStateReconfiguring)
|
|
+ goto out;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
|
|
+ &num_devs);
|
|
+ if (err != 1) {
|
|
+ if (err >= 0)
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading number of devices");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < num_devs; i++) {
|
|
+ len = snprintf(state_str, sizeof(state_str), "state-%d", i);
|
|
+ if (unlikely(len >= (sizeof(state_str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "String overflow while reading "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
|
|
+ "%d", &substate);
|
|
+ if (err != 1)
|
|
+ substate = XenbusStateUnknown;
|
|
+
|
|
+ switch (substate) {
|
|
+ case XenbusStateInitialising:
|
|
+ dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
|
|
+
|
|
+ len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
|
|
+ if (unlikely(len >= (sizeof(dev_str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "String overflow while "
|
|
+ "reading configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ dev_str, "%x:%x:%x.%x",
|
|
+ &domain, &bus, &slot, &func);
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading device "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ if (err != 4) {
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error parsing pci device "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pciback_export_device(pdev, domain, bus, slot,
|
|
+ func, i);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* Publish pci roots. */
|
|
+ err = pciback_publish_pci_roots(pdev,
|
|
+ pciback_publish_pci_root);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error while publish PCI root"
|
|
+ "buses for frontend");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
|
|
+ state_str, "%d",
|
|
+ XenbusStateInitialised);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error switching substate of "
|
|
+ "dev-%d\n", i);
|
|
+ goto out;
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
|
|
+
|
|
+ len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
|
|
+ if (unlikely(len >= (sizeof(dev_str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "String overflow while "
|
|
+ "reading configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ dev_str, "%x:%x:%x.%x",
|
|
+ &domain, &bus, &slot, &func);
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading device "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ if (err != 4) {
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error parsing pci device "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pciback_remove_device(pdev, domain, bus, slot,
|
|
+ func);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* TODO: If at some point we implement support for pci
|
|
+ * root hot-remove on pcifront side, we'll need to
|
|
+ * remove unnecessary xenstore nodes of pci roots here.
|
|
+ */
|
|
+
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error switching to reconfigured state!");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+out:
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void pciback_frontend_changed(struct xenbus_device *xdev,
|
|
+ enum xenbus_state fe_state)
|
|
+{
|
|
+ struct pciback_device *pdev = dev_get_drvdata(&xdev->dev);
|
|
+
|
|
+ dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
|
|
+
|
|
+ switch (fe_state) {
|
|
+ case XenbusStateInitialised:
|
|
+ pciback_attach(pdev);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateReconfiguring:
|
|
+ pciback_reconfigure(pdev);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ /* pcifront switched its state from reconfiguring to connected.
|
|
+ * Then switch to connected state.
|
|
+ */
|
|
+ xenbus_switch_state(xdev, XenbusStateConnected);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ pciback_disconnect(pdev);
|
|
+ xenbus_switch_state(xdev, XenbusStateClosing);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosed:
|
|
+ pciback_disconnect(pdev);
|
|
+ xenbus_switch_state(xdev, XenbusStateClosed);
|
|
+ if (xenbus_dev_is_online(xdev))
|
|
+ break;
|
|
+ /* fall through if not online */
|
|
+ case XenbusStateUnknown:
|
|
+ dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
|
|
+ device_unregister(&xdev->dev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int pciback_setup_backend(struct pciback_device *pdev)
|
|
+{
|
|
+ /* Get configuration from xend (if available now) */
|
|
+ int domain, bus, slot, func;
|
|
+ int err = 0;
|
|
+ int i, num_devs;
|
|
+ char dev_str[64];
|
|
+ char state_str[64];
|
|
+
|
|
+ /* It's possible we could get the call to setup twice, so make sure
|
|
+ * we're not already connected.
|
|
+ */
|
|
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
|
+ XenbusStateInitWait)
|
|
+ goto out;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "getting be setup\n");
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
|
|
+ &num_devs);
|
|
+ if (err != 1) {
|
|
+ if (err >= 0)
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading number of devices");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < num_devs; i++) {
|
|
+ int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
|
|
+ if (unlikely(l >= (sizeof(dev_str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "String overflow while reading "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
|
|
+ "%x:%x:%x.%x", &domain, &bus, &slot, &func);
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading device configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ if (err != 4) {
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error parsing pci device "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pciback_export_device(pdev, domain, bus, slot, func, i);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* Switch substate of this device. */
|
|
+ l = snprintf(state_str, sizeof(state_str), "state-%d", i);
|
|
+ if (unlikely(l >= (sizeof(state_str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "String overflow while reading "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
|
|
+ "%d", XenbusStateInitialised);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err, "Error switching "
|
|
+ "substate of dev-%d\n", i);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error while publish PCI root buses "
|
|
+ "for frontend");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error switching to initialised state!");
|
|
+
|
|
+out:
|
|
+ if (!err)
|
|
+ /* see if pcifront is already configured (if not, we'll wait) */
|
|
+ pciback_attach(pdev);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void pciback_be_watch(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ struct pciback_device *pdev =
|
|
+ container_of(watch, struct pciback_device, be_watch);
|
|
+
|
|
+ switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
|
|
+ case XenbusStateInitWait:
|
|
+ pciback_setup_backend(pdev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int pciback_xenbus_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pciback_device *pdev = alloc_pdev(dev);
|
|
+
|
|
+ if (pdev == NULL) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "Error allocating pciback_device struct");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* wait for xend to configure us */
|
|
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* watch the backend node for backend configuration information */
|
|
+ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
|
|
+ pciback_be_watch);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ pdev->be_watching = 1;
|
|
+
|
|
+ /* We need to force a call to our callback here in case
|
|
+ * xend already configured us!
|
|
+ */
|
|
+ pciback_be_watch(&pdev->be_watch, NULL, 0);
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_xenbus_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct pciback_device *pdev = dev_get_drvdata(&dev->dev);
|
|
+
|
|
+ if (pdev != NULL)
|
|
+ free_pdev(pdev);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static const struct xenbus_device_id xenpci_ids[] = {
|
|
+ {"pci"},
|
|
+ {""},
|
|
+};
|
|
+
|
|
+static struct xenbus_driver xenbus_pciback_driver = {
|
|
+ .name = "pciback",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = xenpci_ids,
|
|
+ .probe = pciback_xenbus_probe,
|
|
+ .remove = pciback_xenbus_remove,
|
|
+ .otherend_changed = pciback_frontend_changed,
|
|
+};
|
|
+
|
|
+int __init pciback_xenbus_register(void)
|
|
+{
|
|
+ pciback_wq = create_workqueue("pciback_workqueue");
|
|
+ if (!pciback_wq) {
|
|
+ printk(KERN_ERR "%s: create"
|
|
+ "pciback_workqueue failed\n", __func__);
|
|
+ return -EFAULT;
|
|
+ }
|
|
+ return xenbus_register_backend(&xenbus_pciback_driver);
|
|
+}
|
|
+
|
|
+void __exit pciback_xenbus_unregister(void)
|
|
+{
|
|
+ destroy_workqueue(pciback_wq);
|
|
+ xenbus_unregister_driver(&xenbus_pciback_driver);
|
|
+}
|
|
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
|
|
index afbe041..319dd0a 100644
|
|
--- a/drivers/xen/platform-pci.c
|
|
+++ b/drivers/xen/platform-pci.c
|
|
@@ -156,9 +156,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
|
|
if (ret)
|
|
goto out;
|
|
xenbus_probe(NULL);
|
|
- ret = xen_setup_shutdown_event();
|
|
- if (ret)
|
|
- goto out;
|
|
return 0;
|
|
|
|
out:
|
|
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
|
|
index 1da8af6..2024a74 100644
|
|
--- a/include/drm/ttm/ttm_bo_driver.h
|
|
+++ b/include/drm/ttm/ttm_bo_driver.h
|
|
@@ -50,13 +50,15 @@ struct ttm_backend_func {
|
|
* @pages: Array of pointers to ttm pages.
|
|
* @dummy_read_page: Page to be used instead of NULL pages in the
|
|
* array @pages.
|
|
+ * @dma_addrs: Array of DMA (bus) address of the ttm pages.
|
|
*
|
|
* Populate the backend with ttm pages. Depending on the backend,
|
|
* it may or may not copy the @pages array.
|
|
*/
|
|
int (*populate) (struct ttm_backend *backend,
|
|
unsigned long num_pages, struct page **pages,
|
|
- struct page *dummy_read_page);
|
|
+ struct page *dummy_read_page,
|
|
+ dma_addr_t *dma_addrs);
|
|
/**
|
|
* struct ttm_backend_func member clear
|
|
*
|
|
@@ -149,6 +151,7 @@ enum ttm_caching_state {
|
|
* @swap_storage: Pointer to shmem struct file for swap storage.
|
|
* @caching_state: The current caching state of the pages.
|
|
* @state: The current binding state of the pages.
|
|
+ * @dma_address: The DMA (bus) addresses of the pages (if TTM_PAGE_FLAG_DMA32)
|
|
*
|
|
* This is a structure holding the pages, caching- and aperture binding
|
|
* status for a buffer object that isn't backed by fixed (VRAM / AGP)
|
|
@@ -173,6 +176,8 @@ struct ttm_tt {
|
|
tt_unbound,
|
|
tt_unpopulated,
|
|
} state;
|
|
+ dma_addr_t *dma_address;
|
|
+ struct device *dev;
|
|
};
|
|
|
|
#define TTM_MEMTYPE_FLAG_FIXED (1 << 0) /* Fixed (on-card) PCI memory */
|
|
@@ -547,6 +552,7 @@ struct ttm_bo_device {
|
|
struct list_head device_list;
|
|
struct ttm_bo_global *glob;
|
|
struct ttm_bo_driver *driver;
|
|
+ struct device *dev;
|
|
rwlock_t vm_lock;
|
|
struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
|
|
spinlock_t fence_lock;
|
|
@@ -787,6 +793,8 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
|
|
* @file_page_offset: Offset into the device address space that is available
|
|
* for buffer data. This ensures compatibility with other users of the
|
|
* address space.
|
|
+ * @need_dma32: Allocate pages under 4GB
|
|
+ * @dev: 'struct device' of the PCI device.
|
|
*
|
|
* Initializes a struct ttm_bo_device:
|
|
* Returns:
|
|
@@ -795,7 +803,8 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
|
|
extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
|
|
struct ttm_bo_global *glob,
|
|
struct ttm_bo_driver *driver,
|
|
- uint64_t file_page_offset, bool need_dma32);
|
|
+ uint64_t file_page_offset, bool need_dma32,
|
|
+ struct device *dev);
|
|
|
|
/**
|
|
* ttm_bo_unmap_virtual
|
|
diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
|
|
index 1168214..ccb6b7a 100644
|
|
--- a/include/drm/ttm/ttm_page_alloc.h
|
|
+++ b/include/drm/ttm/ttm_page_alloc.h
|
|
@@ -36,11 +36,15 @@
|
|
* @flags: ttm flags for page allocation.
|
|
* @cstate: ttm caching state for the page.
|
|
* @count: number of pages to allocate.
|
|
+ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
|
|
+ * @dev: struct device for appropiate DMA accounting.
|
|
*/
|
|
int ttm_get_pages(struct list_head *pages,
|
|
int flags,
|
|
enum ttm_caching_state cstate,
|
|
- unsigned count);
|
|
+ unsigned count,
|
|
+ dma_addr_t *dma_address,
|
|
+ struct device *dev);
|
|
/**
|
|
* Put linked list of pages to pool.
|
|
*
|
|
@@ -49,11 +53,15 @@ int ttm_get_pages(struct list_head *pages,
|
|
* count.
|
|
* @flags: ttm flags for page allocation.
|
|
* @cstate: ttm caching state.
|
|
+ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
|
|
+ * @dev: struct device for appropiate DMA accounting.
|
|
*/
|
|
void ttm_put_pages(struct list_head *pages,
|
|
unsigned page_count,
|
|
int flags,
|
|
- enum ttm_caching_state cstate);
|
|
+ enum ttm_caching_state cstate,
|
|
+ dma_addr_t *dma_address,
|
|
+ struct device *dev);
|
|
/**
|
|
* Initialize pool allocator.
|
|
*/
|
|
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
|
|
index 55e0d42..d746da1 100644
|
|
--- a/include/linux/interrupt.h
|
|
+++ b/include/linux/interrupt.h
|
|
@@ -55,7 +55,7 @@
|
|
* Used by threaded interrupts which need to keep the
|
|
* irq line disabled until the threaded handler has been run.
|
|
* IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
|
|
- *
|
|
+ * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
|
|
*/
|
|
#define IRQF_DISABLED 0x00000020
|
|
#define IRQF_SAMPLE_RANDOM 0x00000040
|
|
@@ -67,6 +67,7 @@
|
|
#define IRQF_IRQPOLL 0x00001000
|
|
#define IRQF_ONESHOT 0x00002000
|
|
#define IRQF_NO_SUSPEND 0x00004000
|
|
+#define IRQF_FORCE_RESUME 0x00008000
|
|
|
|
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND)
|
|
|
|
diff --git a/include/xen/blkif.h b/include/xen/blkif.h
|
|
new file mode 100644
|
|
index 0000000..ab79426
|
|
--- /dev/null
|
|
+++ b/include/xen/blkif.h
|
|
@@ -0,0 +1,122 @@
|
|
+/*
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_BLKIF_H__
|
|
+#define __XEN_BLKIF_H__
|
|
+
|
|
+#include <xen/interface/io/ring.h>
|
|
+#include <xen/interface/io/blkif.h>
|
|
+#include <xen/interface/io/protocols.h>
|
|
+
|
|
+/* Not a real protocol. Used to generate ring structs which contain
|
|
+ * the elements common to all protocols only. This way we get a
|
|
+ * compiler-checkable way to use common struct elements, so we can
|
|
+ * avoid using switch(protocol) in a number of places. */
|
|
+struct blkif_common_request {
|
|
+ char dummy;
|
|
+};
|
|
+struct blkif_common_response {
|
|
+ char dummy;
|
|
+};
|
|
+
|
|
+/* i386 protocol version */
|
|
+#pragma pack(push, 4)
|
|
+struct blkif_x86_32_request {
|
|
+ uint8_t operation; /* BLKIF_OP_??? */
|
|
+ uint8_t nr_segments; /* number of segments */
|
|
+ blkif_vdev_t handle; /* only for read/write requests */
|
|
+ uint64_t id; /* private guest value, echoed in resp */
|
|
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
|
|
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+};
|
|
+struct blkif_x86_32_response {
|
|
+ uint64_t id; /* copied from request */
|
|
+ uint8_t operation; /* copied from request */
|
|
+ int16_t status; /* BLKIF_RSP_??? */
|
|
+};
|
|
+typedef struct blkif_x86_32_request blkif_x86_32_request_t;
|
|
+typedef struct blkif_x86_32_response blkif_x86_32_response_t;
|
|
+#pragma pack(pop)
|
|
+
|
|
+/* x86_64 protocol version */
|
|
+struct blkif_x86_64_request {
|
|
+ uint8_t operation; /* BLKIF_OP_??? */
|
|
+ uint8_t nr_segments; /* number of segments */
|
|
+ blkif_vdev_t handle; /* only for read/write requests */
|
|
+ uint64_t __attribute__((__aligned__(8))) id;
|
|
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
|
|
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+};
|
|
+struct blkif_x86_64_response {
|
|
+ uint64_t __attribute__((__aligned__(8))) id;
|
|
+ uint8_t operation; /* copied from request */
|
|
+ int16_t status; /* BLKIF_RSP_??? */
|
|
+};
|
|
+typedef struct blkif_x86_64_request blkif_x86_64_request_t;
|
|
+typedef struct blkif_x86_64_response blkif_x86_64_response_t;
|
|
+
|
|
+DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response);
|
|
+DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response);
|
|
+DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response);
|
|
+
|
|
+union blkif_back_rings {
|
|
+ struct blkif_back_ring native;
|
|
+ struct blkif_common_back_ring common;
|
|
+ struct blkif_x86_32_back_ring x86_32;
|
|
+ struct blkif_x86_64_back_ring x86_64;
|
|
+};
|
|
+
|
|
+enum blkif_protocol {
|
|
+ BLKIF_PROTOCOL_NATIVE = 1,
|
|
+ BLKIF_PROTOCOL_X86_32 = 2,
|
|
+ BLKIF_PROTOCOL_X86_64 = 3,
|
|
+};
|
|
+
|
|
+static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src)
|
|
+{
|
|
+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
|
+ dst->operation = src->operation;
|
|
+ dst->nr_segments = src->nr_segments;
|
|
+ dst->handle = src->handle;
|
|
+ dst->id = src->id;
|
|
+ dst->u.rw.sector_number = src->sector_number;
|
|
+ barrier();
|
|
+ if (n > dst->nr_segments)
|
|
+ n = dst->nr_segments;
|
|
+ for (i = 0; i < n; i++)
|
|
+ dst->u.rw.seg[i] = src->seg[i];
|
|
+}
|
|
+
|
|
+static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src)
|
|
+{
|
|
+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
|
+ dst->operation = src->operation;
|
|
+ dst->nr_segments = src->nr_segments;
|
|
+ dst->handle = src->handle;
|
|
+ dst->id = src->id;
|
|
+ dst->u.rw.sector_number = src->sector_number;
|
|
+ barrier();
|
|
+ if (n > dst->nr_segments)
|
|
+ n = dst->nr_segments;
|
|
+ for (i = 0; i < n; i++)
|
|
+ dst->u.rw.seg[i] = src->seg[i];
|
|
+}
|
|
+
|
|
+#endif /* __XEN_BLKIF_H__ */
|
|
diff --git a/include/xen/events.h b/include/xen/events.h
|
|
index 00f53dd..a0c8185 100644
|
|
--- a/include/xen/events.h
|
|
+++ b/include/xen/events.h
|
|
@@ -23,6 +23,12 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id);
|
|
+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
|
|
+ unsigned int remote_port,
|
|
+ irq_handler_t handler,
|
|
+ unsigned long irqflags,
|
|
+ const char *devname,
|
|
+ void *dev_id);
|
|
|
|
/*
|
|
* Common unbind function for all event sources. Takes IRQ to unbind from.
|
|
@@ -75,11 +81,10 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
|
|
int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
|
|
|
|
#ifdef CONFIG_PCI_MSI
|
|
-/* Allocate an irq and a pirq to be used with MSIs. */
|
|
-#define XEN_ALLOC_PIRQ (1 << 0)
|
|
-#define XEN_ALLOC_IRQ (1 << 1)
|
|
-void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask);
|
|
-int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
|
|
+int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
|
|
+int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
|
|
+ int pirq, int vector, const char *name,
|
|
+ domid_t domid);
|
|
#endif
|
|
|
|
/* De-allocates the above mentioned physical interrupt. */
|
|
@@ -94,4 +99,10 @@ int xen_gsi_from_irq(unsigned pirq);
|
|
/* Return irq from pirq */
|
|
int xen_irq_from_pirq(unsigned pirq);
|
|
|
|
+/* Return the pirq allocated to the irq. */
|
|
+int xen_pirq_from_irq(unsigned irq);
|
|
+
|
|
+/* Determine whether to ignore this IRQ if it is passed to a guest. */
|
|
+int xen_test_irq_shared(int irq);
|
|
+
|
|
#endif /* _XEN_EVENTS_H */
|
|
diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
|
|
new file mode 100644
|
|
index 0000000..76bd580
|
|
--- /dev/null
|
|
+++ b/include/xen/gntalloc.h
|
|
@@ -0,0 +1,82 @@
|
|
+/******************************************************************************
|
|
+ * gntalloc.h
|
|
+ *
|
|
+ * Interface to /dev/xen/gntalloc.
|
|
+ *
|
|
+ * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov>
|
|
+ *
|
|
+ * This file is in the public domain.
|
|
+ */
|
|
+
|
|
+#ifndef __LINUX_PUBLIC_GNTALLOC_H__
|
|
+#define __LINUX_PUBLIC_GNTALLOC_H__
|
|
+
|
|
+/*
|
|
+ * Allocates a new page and creates a new grant reference.
|
|
+ */
|
|
+#define IOCTL_GNTALLOC_ALLOC_GREF \
|
|
+_IOC(_IOC_NONE, 'G', 5, sizeof(struct ioctl_gntalloc_alloc_gref))
|
|
+struct ioctl_gntalloc_alloc_gref {
|
|
+ /* IN parameters */
|
|
+ /* The ID of the domain to be given access to the grants. */
|
|
+ uint16_t domid;
|
|
+ /* Flags for this mapping */
|
|
+ uint16_t flags;
|
|
+ /* Number of pages to map */
|
|
+ uint32_t count;
|
|
+ /* OUT parameters */
|
|
+ /* The offset to be used on a subsequent call to mmap(). */
|
|
+ uint64_t index;
|
|
+ /* The grant references of the newly created grant, one per page */
|
|
+ /* Variable size, depending on count */
|
|
+ uint32_t gref_ids[1];
|
|
+};
|
|
+
|
|
+#define GNTALLOC_FLAG_WRITABLE 1
|
|
+
|
|
+/*
|
|
+ * Deallocates the grant reference, allowing the associated page to be freed if
|
|
+ * no other domains are using it.
|
|
+ */
|
|
+#define IOCTL_GNTALLOC_DEALLOC_GREF \
|
|
+_IOC(_IOC_NONE, 'G', 6, sizeof(struct ioctl_gntalloc_dealloc_gref))
|
|
+struct ioctl_gntalloc_dealloc_gref {
|
|
+ /* IN parameters */
|
|
+ /* The offset returned in the map operation */
|
|
+ uint64_t index;
|
|
+ /* Number of references to unmap */
|
|
+ uint32_t count;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Sets up an unmap notification within the page, so that the other side can do
|
|
+ * cleanup if this side crashes. Required to implement cross-domain robust
|
|
+ * mutexes or close notification on communication channels.
|
|
+ *
|
|
+ * Each mapped page only supports one notification; multiple calls referring to
|
|
+ * the same page overwrite the previous notification. You must clear the
|
|
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
|
|
+ * to occur.
|
|
+ */
|
|
+#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
|
|
+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntalloc_unmap_notify))
|
|
+struct ioctl_gntalloc_unmap_notify {
|
|
+ /* IN parameters */
|
|
+ /* Offset in the file descriptor for a byte within the page (same as
|
|
+ * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
|
|
+ * be cleared. Otherwise, it can be any byte in the page whose
|
|
+ * notification we are adjusting.
|
|
+ */
|
|
+ uint64_t index;
|
|
+ /* Action(s) to take on unmap */
|
|
+ uint32_t action;
|
|
+ /* Event channel to notify */
|
|
+ uint32_t event_channel_port;
|
|
+};
|
|
+
|
|
+/* Clear (set to zero) the byte specified by index */
|
|
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
|
|
+/* Send an interrupt on the indicated event channel */
|
|
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
|
|
+
|
|
+#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
|
|
diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
|
|
index eb23f41..5304bd3 100644
|
|
--- a/include/xen/gntdev.h
|
|
+++ b/include/xen/gntdev.h
|
|
@@ -116,4 +116,35 @@ struct ioctl_gntdev_set_max_grants {
|
|
uint32_t count;
|
|
};
|
|
|
|
+/*
|
|
+ * Sets up an unmap notification within the page, so that the other side can do
|
|
+ * cleanup if this side crashes. Required to implement cross-domain robust
|
|
+ * mutexes or close notification on communication channels.
|
|
+ *
|
|
+ * Each mapped page only supports one notification; multiple calls referring to
|
|
+ * the same page overwrite the previous notification. You must clear the
|
|
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
|
|
+ * to occur.
|
|
+ */
|
|
+#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
|
|
+_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
|
|
+struct ioctl_gntdev_unmap_notify {
|
|
+ /* IN parameters */
|
|
+ /* Offset in the file descriptor for a byte within the page (same as
|
|
+ * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
|
|
+ * be cleared. Otherwise, it can be any byte in the page whose
|
|
+ * notification we are adjusting.
|
|
+ */
|
|
+ uint64_t index;
|
|
+ /* Action(s) to take on unmap */
|
|
+ uint32_t action;
|
|
+ /* Event channel to notify */
|
|
+ uint32_t event_channel_port;
|
|
+};
|
|
+
|
|
+/* Clear (set to zero) the byte specified by index */
|
|
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
|
|
+/* Send an interrupt on the indicated event channel */
|
|
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
|
|
+
|
|
#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
|
|
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
|
|
index c2d1fa4..61e523a 100644
|
|
--- a/include/xen/interface/io/blkif.h
|
|
+++ b/include/xen/interface/io/blkif.h
|
|
@@ -51,11 +51,7 @@ typedef uint64_t blkif_sector_t;
|
|
*/
|
|
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
|
|
|
|
-struct blkif_request {
|
|
- uint8_t operation; /* BLKIF_OP_??? */
|
|
- uint8_t nr_segments; /* number of segments */
|
|
- blkif_vdev_t handle; /* only for read/write requests */
|
|
- uint64_t id; /* private guest value, echoed in resp */
|
|
+struct blkif_request_rw {
|
|
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
|
|
struct blkif_request_segment {
|
|
grant_ref_t gref; /* reference to I/O buffer frame */
|
|
@@ -65,6 +61,16 @@ struct blkif_request {
|
|
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
};
|
|
|
|
+struct blkif_request {
|
|
+ uint8_t operation; /* BLKIF_OP_??? */
|
|
+ uint8_t nr_segments; /* number of segments */
|
|
+ blkif_vdev_t handle; /* only for read/write requests */
|
|
+ uint64_t id; /* private guest value, echoed in resp */
|
|
+ union {
|
|
+ struct blkif_request_rw rw;
|
|
+ } u;
|
|
+};
|
|
+
|
|
struct blkif_response {
|
|
uint64_t id; /* copied from request */
|
|
uint8_t operation; /* copied from request */
|
|
@@ -91,4 +97,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
|
|
#define VDISK_REMOVABLE 0x2
|
|
#define VDISK_READONLY 0x4
|
|
|
|
+/* Xen-defined major numbers for virtual disks, they look strangely
|
|
+ * familiar */
|
|
+#define XEN_IDE0_MAJOR 3
|
|
+#define XEN_IDE1_MAJOR 22
|
|
+#define XEN_SCSI_DISK0_MAJOR 8
|
|
+#define XEN_SCSI_DISK1_MAJOR 65
|
|
+#define XEN_SCSI_DISK2_MAJOR 66
|
|
+#define XEN_SCSI_DISK3_MAJOR 67
|
|
+#define XEN_SCSI_DISK4_MAJOR 68
|
|
+#define XEN_SCSI_DISK5_MAJOR 69
|
|
+#define XEN_SCSI_DISK6_MAJOR 70
|
|
+#define XEN_SCSI_DISK7_MAJOR 71
|
|
+#define XEN_SCSI_DISK8_MAJOR 128
|
|
+#define XEN_SCSI_DISK9_MAJOR 129
|
|
+#define XEN_SCSI_DISK10_MAJOR 130
|
|
+#define XEN_SCSI_DISK11_MAJOR 131
|
|
+#define XEN_SCSI_DISK12_MAJOR 132
|
|
+#define XEN_SCSI_DISK13_MAJOR 133
|
|
+#define XEN_SCSI_DISK14_MAJOR 134
|
|
+#define XEN_SCSI_DISK15_MAJOR 135
|
|
+
|
|
#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
|
|
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
|
|
index 518481c..cb94668 100644
|
|
--- a/include/xen/interface/io/netif.h
|
|
+++ b/include/xen/interface/io/netif.h
|
|
@@ -22,50 +22,50 @@
|
|
|
|
/*
|
|
* This is the 'wire' format for packets:
|
|
- * Request 1: netif_tx_request -- NETTXF_* (any flags)
|
|
- * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info)
|
|
- * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE)
|
|
- * Request 4: netif_tx_request -- NETTXF_more_data
|
|
- * Request 5: netif_tx_request -- NETTXF_more_data
|
|
+ * Request 1: xen_netif_tx_request -- XEN_NETTXF_* (any flags)
|
|
+ * [Request 2: xen_netif_extra_info] (only if request 1 has XEN_NETTXF_extra_info)
|
|
+ * [Request 3: xen_netif_extra_info] (only if request 2 has XEN_NETIF_EXTRA_MORE)
|
|
+ * Request 4: xen_netif_tx_request -- XEN_NETTXF_more_data
|
|
+ * Request 5: xen_netif_tx_request -- XEN_NETTXF_more_data
|
|
* ...
|
|
- * Request N: netif_tx_request -- 0
|
|
+ * Request N: xen_netif_tx_request -- 0
|
|
*/
|
|
|
|
/* Protocol checksum field is blank in the packet (hardware offload)? */
|
|
-#define _NETTXF_csum_blank (0)
|
|
-#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank)
|
|
+#define _XEN_NETTXF_csum_blank (0)
|
|
+#define XEN_NETTXF_csum_blank (1U<<_XEN_NETTXF_csum_blank)
|
|
|
|
/* Packet data has been validated against protocol checksum. */
|
|
-#define _NETTXF_data_validated (1)
|
|
-#define NETTXF_data_validated (1U<<_NETTXF_data_validated)
|
|
+#define _XEN_NETTXF_data_validated (1)
|
|
+#define XEN_NETTXF_data_validated (1U<<_XEN_NETTXF_data_validated)
|
|
|
|
/* Packet continues in the next request descriptor. */
|
|
-#define _NETTXF_more_data (2)
|
|
-#define NETTXF_more_data (1U<<_NETTXF_more_data)
|
|
+#define _XEN_NETTXF_more_data (2)
|
|
+#define XEN_NETTXF_more_data (1U<<_XEN_NETTXF_more_data)
|
|
|
|
/* Packet to be followed by extra descriptor(s). */
|
|
-#define _NETTXF_extra_info (3)
|
|
-#define NETTXF_extra_info (1U<<_NETTXF_extra_info)
|
|
+#define _XEN_NETTXF_extra_info (3)
|
|
+#define XEN_NETTXF_extra_info (1U<<_XEN_NETTXF_extra_info)
|
|
|
|
struct xen_netif_tx_request {
|
|
grant_ref_t gref; /* Reference to buffer page */
|
|
uint16_t offset; /* Offset within buffer page */
|
|
- uint16_t flags; /* NETTXF_* */
|
|
+ uint16_t flags; /* XEN_NETTXF_* */
|
|
uint16_t id; /* Echoed in response message. */
|
|
uint16_t size; /* Packet size in bytes. */
|
|
};
|
|
|
|
-/* Types of netif_extra_info descriptors. */
|
|
-#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */
|
|
-#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */
|
|
-#define XEN_NETIF_EXTRA_TYPE_MAX (2)
|
|
+/* Types of xen_netif_extra_info descriptors. */
|
|
+#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */
|
|
+#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */
|
|
+#define XEN_NETIF_EXTRA_TYPE_MAX (2)
|
|
|
|
-/* netif_extra_info flags. */
|
|
-#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
|
|
-#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
|
|
+/* xen_netif_extra_info flags. */
|
|
+#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
|
|
+#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
|
|
|
|
/* GSO types - only TCPv4 currently supported. */
|
|
-#define XEN_NETIF_GSO_TYPE_TCPV4 (1)
|
|
+#define XEN_NETIF_GSO_TYPE_TCPV4 (1)
|
|
|
|
/*
|
|
* This structure needs to fit within both netif_tx_request and
|
|
@@ -107,7 +107,7 @@ struct xen_netif_extra_info {
|
|
|
|
struct xen_netif_tx_response {
|
|
uint16_t id;
|
|
- int16_t status; /* NETIF_RSP_* */
|
|
+ int16_t status; /* XEN_NETIF_RSP_* */
|
|
};
|
|
|
|
struct xen_netif_rx_request {
|
|
@@ -116,25 +116,29 @@ struct xen_netif_rx_request {
|
|
};
|
|
|
|
/* Packet data has been validated against protocol checksum. */
|
|
-#define _NETRXF_data_validated (0)
|
|
-#define NETRXF_data_validated (1U<<_NETRXF_data_validated)
|
|
+#define _XEN_NETRXF_data_validated (0)
|
|
+#define XEN_NETRXF_data_validated (1U<<_XEN_NETRXF_data_validated)
|
|
|
|
/* Protocol checksum field is blank in the packet (hardware offload)? */
|
|
-#define _NETRXF_csum_blank (1)
|
|
-#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank)
|
|
+#define _XEN_NETRXF_csum_blank (1)
|
|
+#define XEN_NETRXF_csum_blank (1U<<_XEN_NETRXF_csum_blank)
|
|
|
|
/* Packet continues in the next request descriptor. */
|
|
-#define _NETRXF_more_data (2)
|
|
-#define NETRXF_more_data (1U<<_NETRXF_more_data)
|
|
+#define _XEN_NETRXF_more_data (2)
|
|
+#define XEN_NETRXF_more_data (1U<<_XEN_NETRXF_more_data)
|
|
|
|
/* Packet to be followed by extra descriptor(s). */
|
|
-#define _NETRXF_extra_info (3)
|
|
-#define NETRXF_extra_info (1U<<_NETRXF_extra_info)
|
|
+#define _XEN_NETRXF_extra_info (3)
|
|
+#define XEN_NETRXF_extra_info (1U<<_XEN_NETRXF_extra_info)
|
|
+
|
|
+/* GSO Prefix descriptor. */
|
|
+#define _XEN_NETRXF_gso_prefix (4)
|
|
+#define XEN_NETRXF_gso_prefix (1U<<_XEN_NETRXF_gso_prefix)
|
|
|
|
struct xen_netif_rx_response {
|
|
uint16_t id;
|
|
uint16_t offset; /* Offset in page of start of received packet */
|
|
- uint16_t flags; /* NETRXF_* */
|
|
+ uint16_t flags; /* XEN_NETRXF_* */
|
|
int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
|
|
};
|
|
|
|
@@ -149,10 +153,10 @@ DEFINE_RING_TYPES(xen_netif_rx,
|
|
struct xen_netif_rx_request,
|
|
struct xen_netif_rx_response);
|
|
|
|
-#define NETIF_RSP_DROPPED -2
|
|
-#define NETIF_RSP_ERROR -1
|
|
-#define NETIF_RSP_OKAY 0
|
|
-/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
|
|
-#define NETIF_RSP_NULL 1
|
|
+#define XEN_NETIF_RSP_DROPPED -2
|
|
+#define XEN_NETIF_RSP_ERROR -1
|
|
+#define XEN_NETIF_RSP_OKAY 0
|
|
+/* No response: used for auxiliary requests (e.g., xen_netif_extra_info). */
|
|
+#define XEN_NETIF_RSP_NULL 1
|
|
|
|
#endif
|
|
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
|
|
index 2befa3e..b33257b 100644
|
|
--- a/include/xen/interface/xen.h
|
|
+++ b/include/xen/interface/xen.h
|
|
@@ -30,7 +30,7 @@
|
|
#define __HYPERVISOR_stack_switch 3
|
|
#define __HYPERVISOR_set_callbacks 4
|
|
#define __HYPERVISOR_fpu_taskswitch 5
|
|
-#define __HYPERVISOR_sched_op 6
|
|
+#define __HYPERVISOR_sched_op_compat 6
|
|
#define __HYPERVISOR_dom0_op 7
|
|
#define __HYPERVISOR_set_debugreg 8
|
|
#define __HYPERVISOR_get_debugreg 9
|
|
@@ -52,7 +52,7 @@
|
|
#define __HYPERVISOR_mmuext_op 26
|
|
#define __HYPERVISOR_acm_op 27
|
|
#define __HYPERVISOR_nmi_op 28
|
|
-#define __HYPERVISOR_sched_op_new 29
|
|
+#define __HYPERVISOR_sched_op 29
|
|
#define __HYPERVISOR_callback_op 30
|
|
#define __HYPERVISOR_xenoprof_op 31
|
|
#define __HYPERVISOR_event_channel_op 32
|
|
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
|
|
index 98b9215..03c85d7 100644
|
|
--- a/include/xen/xen-ops.h
|
|
+++ b/include/xen/xen-ops.h
|
|
@@ -5,9 +5,9 @@
|
|
|
|
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
|
|
|
|
-void xen_pre_suspend(void);
|
|
-void xen_post_suspend(int suspend_cancelled);
|
|
-void xen_hvm_post_suspend(int suspend_cancelled);
|
|
+void xen_arch_pre_suspend(void);
|
|
+void xen_arch_post_suspend(int suspend_cancelled);
|
|
+void xen_arch_hvm_post_suspend(int suspend_cancelled);
|
|
|
|
void xen_mm_pin_all(void);
|
|
void xen_mm_unpin_all(void);
|
|
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
|
|
index 9033c1c..2782bac 100644
|
|
--- a/kernel/irq/manage.c
|
|
+++ b/kernel/irq/manage.c
|
|
@@ -282,8 +282,17 @@ EXPORT_SYMBOL(disable_irq);
|
|
|
|
void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
|
|
{
|
|
- if (resume)
|
|
+ if (resume) {
|
|
+ if (!(desc->status & IRQ_SUSPENDED)) {
|
|
+ if (!desc->action)
|
|
+ return;
|
|
+ if (!(desc->action->flags & IRQF_FORCE_RESUME))
|
|
+ return;
|
|
+ /* Pretend that it got disabled ! */
|
|
+ desc->depth++;
|
|
+ }
|
|
desc->status &= ~IRQ_SUSPENDED;
|
|
+ }
|
|
|
|
switch (desc->depth) {
|
|
case 0:
|
|
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
|
|
index 0d4005d8..d6bfb89 100644
|
|
--- a/kernel/irq/pm.c
|
|
+++ b/kernel/irq/pm.c
|
|
@@ -53,9 +53,6 @@ void resume_device_irqs(void)
|
|
for_each_irq_desc(irq, desc) {
|
|
unsigned long flags;
|
|
|
|
- if (!(desc->status & IRQ_SUSPENDED))
|
|
- continue;
|
|
-
|
|
raw_spin_lock_irqsave(&desc->lock, flags);
|
|
__enable_irq(desc, irq, true);
|
|
raw_spin_unlock_irqrestore(&desc->lock, flags);
|