kernel/xen.next-2.6.38.patch

From 1e13f505ecbc011465783283ebfa05a42f7ce18f Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Thu, 3 Dec 2009 22:04:06 +0000
Subject: [PATCH 001/197] xen: export xen_gsi_from_irq, it is required by modular pciback

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/events.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 97612f5..a04da4b 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -778,6 +778,7 @@ int xen_gsi_from_irq(unsigned irq)
 {
 	return gsi_from_irq(irq);
 }
+EXPORT_SYMBOL_GPL(xen_gsi_from_irq);

 int xen_irq_from_pirq(unsigned pirq)
 {
--
1.7.4


From ce101466403a469545056427c4ddaaf6843c700c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:23:31 -0800
Subject: [PATCH 002/197] xen: drop all the special iomap pte paths.

Xen can work out when we're doing IO mappings for itself, so we don't
need to do anything special, and the extra tests just clog things up.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   25 -------------------------
 1 files changed, 0 insertions(+), 25 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 44924e5..5ff42a6 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -607,11 +607,6 @@ static bool xen_page_pinned(void *ptr)
 	return PagePinned(page);
 }

-static bool xen_iomap_pte(pte_t pte)
-{
-	return pte_flags(pte) & _PAGE_IOMAP;
-}
-
 void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
 {
 	struct multicall_space mcs;
@@ -630,11 +625,6 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
 }
 EXPORT_SYMBOL_GPL(xen_set_domain_pte);

-static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
-{
-	xen_set_domain_pte(ptep, pteval, DOMID_IO);
-}
-
 static void xen_extend_mmu_update(const struct mmu_update *update)
 {
 	struct multicall_space mcs;
@@ -711,11 +701,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
-	if (xen_iomap_pte(pteval)) {
-		xen_set_iomap_pte(ptep, pteval);
-		goto out;
-	}
-
 	ADD_STATS(set_pte_at, 1);
 //	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
 	ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -947,11 +932,6 @@ void xen_set_pud(pud_t *ptr, pud_t val)

 void xen_set_pte(pte_t *ptep, pte_t pte)
 {
-	if (xen_iomap_pte(pte)) {
-		xen_set_iomap_pte(ptep, pte);
-		return;
-	}
-
 	ADD_STATS(pte_update, 1);
 //	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
 	ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
@@ -968,11 +948,6 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
 #ifdef CONFIG_X86_PAE
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
-	if (xen_iomap_pte(pte)) {
-		xen_set_iomap_pte(ptep, pte);
-		return;
-	}
-
 	set_64bit((u64 *)ptep, native_pte_val(pte));
 }

--
1.7.4


From 09b56ed909733cec3caa326126451d44354f87e8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:13:34 -0800
Subject: [PATCH 003/197] xen: use mmu_update for xen_set_pte_at()

In principle update_va_mapping is a good match for set_pte_at, since
it gets the address being mapped, which allows Xen to use its linear
pagetable mapping.

However that assumes that the pmd for the address is attached to the
current pagetable, which may not be true for a given user address space
because the kernel pmd is not shared (at least on 32-bit guests).
Normally the kernel will automatically sync a missing part of the
pagetable with the init_mm pagetable transparently via faults, but that
fails when a missing address is passed to Xen.

And while the linear pagetable mapping is very useful for 32-bit Xen
(as it avoids an explicit domain mapping), 32-bit Xen is deprecated.
64-bit Xen has all memory mapped all the time, so it makes no real
difference.

The upshot is that we should use mmu_update, since it can operate on
non-current pagetables or detached pagetables.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   26 +++++++++++---------------
 1 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5ff42a6..69ac75b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -706,22 +706,18 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	ADD_STATS(set_pte_at_current, mm == current->mm);
 	ADD_STATS(set_pte_at_kernel, mm == &init_mm);

-	if (mm == current->mm || mm == &init_mm) {
-		if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-			struct multicall_space mcs;
-			mcs = xen_mc_entry(0);
-
-			MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
-			ADD_STATS(set_pte_at_batched, 1);
-			xen_mc_issue(PARAVIRT_LAZY_MMU);
-			goto out;
-		} else
-			if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
-				goto out;
-	}
-	xen_set_pte(ptep, pteval);
+	if(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+		struct mmu_update u;
+
+		xen_mc_batch();
+
+		u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
+		u.val = pte_val_ma(pteval);
+		xen_extend_mmu_update(&u);

-out:	return;
+		xen_mc_issue(PARAVIRT_LAZY_MMU);
+	} else
+		native_set_pte(ptep, pteval);
 }

 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
--
1.7.4


From 1dcbbb8bad80a25888e692686739de790a728bbc Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:30:41 -0800
Subject: [PATCH 004/197] xen: condense everything onto xen_set_pte

xen_set_pte_at and xen_clear_pte are essentially identical to
xen_set_pte, so just make them all common.

When batched set_pte and pte_clear are the same, but the unbatch operation
must be different: they need to update the two halves of the pte in
different order.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   73 +++++++++++++++++++--------------------------------
 1 files changed, 27 insertions(+), 46 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 69ac75b..91632ff 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -108,12 +108,6 @@ static struct {

 	u32 prot_commit;
 	u32 prot_commit_batched;
-
-	u32 set_pte_at;
-	u32 set_pte_at_batched;
-	u32 set_pte_at_pinned;
-	u32 set_pte_at_current;
-	u32 set_pte_at_kernel;
 } mmu_stats;

 static u8 zero_stats;
@@ -698,28 +692,39 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 	set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
 }

-void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
-		    pte_t *ptep, pte_t pteval)
+static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
 {
-	ADD_STATS(set_pte_at, 1);
-//	ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
-	ADD_STATS(set_pte_at_current, mm == current->mm);
-	ADD_STATS(set_pte_at_kernel, mm == &init_mm);
+	struct mmu_update u;

-	if(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-		struct mmu_update u;
+	if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
+		return false;

-		xen_mc_batch();
+	xen_mc_batch();
+
+	u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
+	u.val = pte_val_ma(pteval);
+	xen_extend_mmu_update(&u);

-		u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
-		u.val = pte_val_ma(pteval);
-		xen_extend_mmu_update(&u);
+	xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+	return true;
+}
+
+void xen_set_pte(pte_t *ptep, pte_t pteval)
+{
+	ADD_STATS(pte_update, 1);
+//	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));

-		xen_mc_issue(PARAVIRT_LAZY_MMU);
-	} else
+	if (!xen_batched_set_pte(ptep, pteval))
 		native_set_pte(ptep, pteval);
 }

+void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
+		    pte_t *ptep, pte_t pteval)
+{
+	xen_set_pte(ptep, pteval);
+}
+
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
 				 unsigned long addr, pte_t *ptep)
 {
@@ -926,21 +931,6 @@ void xen_set_pud(pud_t *ptr, pud_t val)
 	xen_set_pud_hyper(ptr, val);
 }

-void xen_set_pte(pte_t *ptep, pte_t pte)
-{
-	ADD_STATS(pte_update, 1);
-//	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
-	ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
-#ifdef CONFIG_X86_PAE
-	ptep->pte_high = pte.pte_high;
-	smp_wmb();
-	ptep->pte_low = pte.pte_low;
-#else
-	*ptep = pte;
-#endif
-}
-
 #ifdef CONFIG_X86_PAE
 void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
@@ -949,9 +939,8 @@ void xen_set_pte_atomic(pte_t *ptep, pte_t pte)

 void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	ptep->pte_low = 0;
-	smp_wmb();		/* make sure low gets written first */
-	ptep->pte_high = 0;
+	if (!xen_batched_set_pte(ptep, native_make_pte(0)))
+		native_pte_clear(mm, addr, ptep);
 }

 void xen_pmd_clear(pmd_t *pmdp)
@@ -2731,14 +2720,6 @@ static int __init xen_mmu_debugfs(void)
 	xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
 				     mmu_stats.mmu_update_histo, 20);

-	debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at);
-	debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug,
-			   &mmu_stats.set_pte_at_batched);
-	debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug,
-			   &mmu_stats.set_pte_at_current);
-	debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug,
-			   &mmu_stats.set_pte_at_kernel);
-
 	debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
 	debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
 			   &mmu_stats.prot_commit_batched);
--
1.7.4


From 3fcd3cdee1532d76da93f2ac2312ac20ffac0478 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:45:48 -0800
Subject: [PATCH 005/197] vmalloc: remove vmalloc_sync_all() from alloc_vm_area()

There's no need for it: it will get faulted into the current pagetable
as needed.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 mm/vmalloc.c |    4 ----
 1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index eb5cc7d..f4109f0 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2096,10 +2096,6 @@ struct vm_struct *alloc_vm_area(size_t size)
 		return NULL;
 	}

-	/* Make sure the pagetables are constructed in process kernel
-	   mappings */
-	vmalloc_sync_all();
-
 	return area;
 }
 EXPORT_SYMBOL_GPL(alloc_vm_area);
--
1.7.4


From b47ecef7cb94691ce4c74df64adf1568fb5794a6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 22:57:39 -0800
Subject: [PATCH 006/197] xen: make a pile of mmu pvop functions static

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   46 +++++++++++++++++++++++-----------------------
 arch/x86/xen/mmu.h |   37 -------------------------------------
 2 files changed, 23 insertions(+), 60 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 91632ff..b38bfdc 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -647,7 +647,7 @@ static void xen_extend_mmu_update(const struct mmu_update *update)
 	*u = *update;
 }

-void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
+static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
 {
 	struct mmu_update u;

@@ -667,7 +667,7 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
 	preempt_enable();
 }

-void xen_set_pmd(pmd_t *ptr, pmd_t val)
+static void xen_set_pmd(pmd_t *ptr, pmd_t val)
 {
 	ADD_STATS(pmd_update, 1);

@@ -710,7 +710,7 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
 	return true;
 }

-void xen_set_pte(pte_t *ptep, pte_t pteval)
+static void xen_set_pte(pte_t *ptep, pte_t pteval)
 {
 	ADD_STATS(pte_update, 1);
 //	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
@@ -719,7 +719,7 @@ void xen_set_pte(pte_t *ptep, pte_t pteval)
 		native_set_pte(ptep, pteval);
 }

-void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
+static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
 	xen_set_pte(ptep, pteval);
@@ -799,7 +799,7 @@ static pteval_t iomap_pte(pteval_t val)
 	return val;
 }

-pteval_t xen_pte_val(pte_t pte)
+static pteval_t xen_pte_val(pte_t pte)
 {
 	pteval_t pteval = pte.pte;

@@ -816,7 +816,7 @@ pteval_t xen_pte_val(pte_t pte)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);

-pgdval_t xen_pgd_val(pgd_t pgd)
+static pgdval_t xen_pgd_val(pgd_t pgd)
 {
 	return pte_mfn_to_pfn(pgd.pgd);
 }
@@ -847,7 +847,7 @@ void xen_set_pat(u64 pat)
 	WARN_ON(pat != 0x0007010600070106ull);
 }

-pte_t xen_make_pte(pteval_t pte)
+static pte_t xen_make_pte(pteval_t pte)
 {
 	phys_addr_t addr = (pte & PTE_PFN_MASK);

@@ -882,20 +882,20 @@ pte_t xen_make_pte(pteval_t pte)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);

-pgd_t xen_make_pgd(pgdval_t pgd)
+static pgd_t xen_make_pgd(pgdval_t pgd)
 {
 	pgd = pte_pfn_to_mfn(pgd);
 	return native_make_pgd(pgd);
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);

-pmdval_t xen_pmd_val(pmd_t pmd)
+static pmdval_t xen_pmd_val(pmd_t pmd)
 {
 	return pte_mfn_to_pfn(pmd.pmd);
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);

-void xen_set_pud_hyper(pud_t *ptr, pud_t val)
+static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 {
 	struct mmu_update u;

@@ -915,7 +915,7 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 	preempt_enable();
 }

-void xen_set_pud(pud_t *ptr, pud_t val)
+static void xen_set_pud(pud_t *ptr, pud_t val)
 {
 	ADD_STATS(pud_update, 1);

@@ -932,24 +932,24 @@ void xen_set_pud(pud_t *ptr, pud_t val)
 }

 #ifdef CONFIG_X86_PAE
-void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
+static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
 	set_64bit((u64 *)ptep, native_pte_val(pte));
 }

-void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	if (!xen_batched_set_pte(ptep, native_make_pte(0)))
 		native_pte_clear(mm, addr, ptep);
 }

-void xen_pmd_clear(pmd_t *pmdp)
+static void xen_pmd_clear(pmd_t *pmdp)
 {
 	set_pmd(pmdp, __pmd(0));
 }
 #endif	/* CONFIG_X86_PAE */

-pmd_t xen_make_pmd(pmdval_t pmd)
+static pmd_t xen_make_pmd(pmdval_t pmd)
 {
 	pmd = pte_pfn_to_mfn(pmd);
 	return native_make_pmd(pmd);
@@ -957,13 +957,13 @@ pmd_t xen_make_pmd(pmdval_t pmd)
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);

 #if PAGETABLE_LEVELS == 4
-pudval_t xen_pud_val(pud_t pud)
+static pudval_t xen_pud_val(pud_t pud)
 {
 	return pte_mfn_to_pfn(pud.pud);
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);

-pud_t xen_make_pud(pudval_t pud)
+static pud_t xen_make_pud(pudval_t pud)
 {
 	pud = pte_pfn_to_mfn(pud);

@@ -971,7 +971,7 @@ pud_t xen_make_pud(pudval_t pud)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);

-pgd_t *xen_get_user_pgd(pgd_t *pgd)
+static pgd_t *xen_get_user_pgd(pgd_t *pgd)
 {
 	pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
 	unsigned offset = pgd - pgd_page;
@@ -1003,7 +1003,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
  *  2. It is always pinned
  *  3. It has no user pagetable attached to it
  */
-void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
 {
 	preempt_disable();

@@ -1016,7 +1016,7 @@ void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
 	preempt_enable();
 }

-void xen_set_pgd(pgd_t *ptr, pgd_t val)
+static void xen_set_pgd(pgd_t *ptr, pgd_t val)
 {
 	pgd_t *user_ptr = xen_get_user_pgd(ptr);

@@ -1439,14 +1439,14 @@ void xen_mm_unpin_all(void)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }

-void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
+static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
 {
 	spin_lock(&next->page_table_lock);
 	xen_pgd_pin(next);
 	spin_unlock(&next->page_table_lock);
 }

-void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
 	spin_lock(&mm->page_table_lock);
 	xen_pgd_pin(mm);
@@ -1533,7 +1533,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
  * pagetable because of lazy tlb flushing.  This means we need need to
  * switch all CPUs off this pagetable before we can unpin it.
  */
-void xen_exit_mmap(struct mm_struct *mm)
+static void xen_exit_mmap(struct mm_struct *mm)
 {
 	get_cpu();		/* make sure we don't move around */
 	xen_drop_mm_ref(mm);
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 537bb9a..73809bb 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -15,43 +15,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);

 void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);

-
-void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
-void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
-void xen_exit_mmap(struct mm_struct *mm);
-
-pteval_t xen_pte_val(pte_t);
-pmdval_t xen_pmd_val(pmd_t);
-pgdval_t xen_pgd_val(pgd_t);
-
-pte_t xen_make_pte(pteval_t);
-pmd_t xen_make_pmd(pmdval_t);
-pgd_t xen_make_pgd(pgdval_t);
-
-void xen_set_pte(pte_t *ptep, pte_t pteval);
-void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
-		    pte_t *ptep, pte_t pteval);
-
-#ifdef CONFIG_X86_PAE
-void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
-void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-void xen_pmd_clear(pmd_t *pmdp);
-#endif	/* CONFIG_X86_PAE */
-
-void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
-void xen_set_pud(pud_t *ptr, pud_t val);
-void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
-void xen_set_pud_hyper(pud_t *ptr, pud_t val);
-
-#if PAGETABLE_LEVELS == 4
-pudval_t xen_pud_val(pud_t pud);
-pud_t xen_make_pud(pudval_t pudval);
-void xen_set_pgd(pgd_t *pgdp, pgd_t pgd);
-void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd);
-#endif
-
-pgd_t *xen_get_user_pgd(pgd_t *pgd);
-
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 				  pte_t *ptep, pte_t pte);
--
1.7.4


From f0885b9401a859bc7bed849925a703c03d00119b Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 8 Nov 2010 14:13:35 -0500
Subject: [PATCH 007/197] xen/pci: Add xen_[find|register|unregister]_device_domain_owner functions.

Xen PCI backend performs ownership (MSI/MSI-X) changes on the behalf of
the guest. This means we need some mechanism to find, set and unset
the domain id of the guest.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/pci.h |   16 +++++++++
 arch/x86/pci/xen.c             |   73 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 2329b3e..8474b4b 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
 #endif
 #if defined(CONFIG_XEN_DOM0)
 void __init xen_setup_pirqs(void);
+int xen_find_device_domain_owner(struct pci_dev *dev);
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
+int xen_unregister_device_domain_owner(struct pci_dev *dev);
 #else
 static inline void __init xen_setup_pirqs(void)
 {
 }
+static inline int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+	return -1;
+}
+static inline int xen_register_device_domain_owner(struct pci_dev *dev,
+ 						   uint16_t domain)
+{
+	return -1;
+}
+static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+	return -1;
+}
 #endif

 #if defined(CONFIG_PCI_MSI)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 117f5b8..6d2a986 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -412,3 +412,76 @@ void __init xen_setup_pirqs(void)
 	}
 }
 #endif
+
+struct xen_device_domain_owner {
+	domid_t domain;
+	struct pci_dev *dev;
+	struct list_head list;
+};
+
+static DEFINE_SPINLOCK(dev_domain_list_spinlock);
+static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
+
+static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+
+	list_for_each_entry(owner, &dev_domain_list, list) {
+		if (owner->dev == dev)
+			return owner;
+	}
+	return NULL;
+}
+
+int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+	int domain = -ENODEV;
+
+	spin_lock(&dev_domain_list_spinlock);
+	owner = find_device(dev);
+	if (owner)
+		domain = owner->domain;
+	spin_unlock(&dev_domain_list_spinlock);
+	return domain;
+}
+EXPORT_SYMBOL(xen_find_device_domain_owner);
+
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
+{
+	struct xen_device_domain_owner *owner;
+
+	owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
+	if (!owner)
+		return -ENODEV;
+
+	spin_lock(&dev_domain_list_spinlock);
+	if (find_device(dev)) {
+		spin_unlock(&dev_domain_list_spinlock);
+		kfree(owner);
+		return -EEXIST;
+	}
+	owner->domain = domain;
+	owner->dev = dev;
+	list_add_tail(&owner->list, &dev_domain_list);
+	spin_unlock(&dev_domain_list_spinlock);
+	return 0;
+}
+EXPORT_SYMBOL(xen_register_device_domain_owner);
+
+int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+
+	spin_lock(&dev_domain_list_spinlock);
+	owner = find_device(dev);
+	if (!owner) {
+		spin_unlock(&dev_domain_list_spinlock);
+		return -ENODEV;
+	}
+	list_del(&owner->list);
+	spin_unlock(&dev_domain_list_spinlock);
+	kfree(owner);
+	return 0;
+}
+EXPORT_SYMBOL(xen_unregister_device_domain_owner);
--
1.7.4


From da24916fdf04d7b4a32c5b9d2c09e47775496e1d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 8 Nov 2010 14:23:17 -0500
Subject: [PATCH 008/197] xen: Check if the PCI device is owned by a domain different than DOMID_SELF.

We check if there is a domain owner for the PCI device. In case of failure
(meaning no domain has registered for this device) we make
DOMID_SELF the owner.

[v2: deal with rebasing on v2.6.37-1]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
---
 drivers/xen/events.c |   16 +++++++++++++---
 1 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index a04da4b..96c93e7 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -40,6 +40,7 @@
 #include <asm/xen/pci.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <asm/xen/pci.h>

 #include <xen/xen.h>
 #include <xen/hvm.h>
@@ -97,6 +98,7 @@ struct irq_info
 			unsigned short gsi;
 			unsigned char vector;
 			unsigned char flags;
+			uint16_t domid;
 		} pirq;
 	} u;
 };
@@ -158,7 +160,8 @@ static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq,
 {
 	return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
 			.cpu = 0,
-			.u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } };
+			.u.pirq = { .pirq = pirq, .gsi = gsi,
+				     .vector = vector, .domid = DOMID_SELF } };
 }

 /*
@@ -688,11 +691,16 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
 	int irq = -1;
 	struct physdev_map_pirq map_irq;
 	int rc;
+	domid_t domid;
 	int pos;
 	u32 table_offset, bir;

+	domid = rc = xen_find_device_domain_owner(dev);
+	if (rc < 0)
+		domid = DOMID_SELF;
+
 	memset(&map_irq, 0, sizeof(map_irq));
-	map_irq.domid = DOMID_SELF;
+	map_irq.domid = domid;
 	map_irq.type = MAP_PIRQ_TYPE_MSI;
 	map_irq.index = -1;
 	map_irq.pirq = -1;
@@ -727,6 +735,8 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
 		goto out;
 	}
 	irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
+	if (domid)
+		irq_info[irq].u.pirq.domid = domid;

 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
 			handle_level_irq,
@@ -753,7 +763,7 @@ int xen_destroy_irq(int irq)

 	if (xen_initial_domain()) {
 		unmap_irq.pirq = info->u.pirq.pirq;
-		unmap_irq.domid = DOMID_SELF;
+		unmap_irq.domid = info->u.pirq.domid;
 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
 		if (rc) {
 			printk(KERN_WARNING "unmap irq failed %d\n", rc);
--
1.7.4


From 30fecb8166bdd163bdaab795b573cf988f60fbbe Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 8 Nov 2010 14:26:36 -0500
Subject: [PATCH 009/197] xen: Add support to check if IRQ line is shared with other domains.

We do this via the PHYSDEVOP_irq_status_query support hypervisor call.
We will get a positive value if another domain has binded its
PIRQ to the specified IRQ line.

[v2: Deal with v2.6.37-rc1 rebase fallout]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/events.c |   13 +++++++++++++
 include/xen/events.h |    3 +++
 2 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 96c93e7..690dfad 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1398,6 +1398,19 @@ void xen_poll_irq(int irq)
 	xen_poll_irq_timeout(irq, 0 /* no timeout */);
 }

+/* Check whether the IRQ line is shared with other guests. */
+int xen_ignore_irq(int irq)
+{
+	struct irq_info *info = info_for_irq(irq);
+	struct physdev_irq_status_query irq_status = { .irq =
+							info->u.pirq.gsi };
+
+	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
+		return 0;
+	return !(irq_status.flags & XENIRQSTAT_shared);
+}
+EXPORT_SYMBOL_GPL(xen_ignore_irq);
+
 void xen_irq_resume(void)
 {
 	unsigned int cpu, irq, evtchn;
diff --git a/include/xen/events.h b/include/xen/events.h
index 646dd17..553c664 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -89,4 +89,7 @@ int xen_vector_from_irq(unsigned pirq);
 /* Return irq from pirq */
 int xen_irq_from_pirq(unsigned pirq);

+/* Determine whether to ignore this IRQ if passed to a guest. */
+int xen_ignore_irq(int irq);
+
 #endif	/* _XEN_EVENTS_H */
--
1.7.4


From 909e45104de4414897cefce2f6bbed07fc4de4b3 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 9 Feb 2009 12:05:50 -0800
Subject: [PATCH 010/197] xen: implement bind_interdomain_evtchn_to_irqhandler for backend drivers

Impact: new Xen-internal API

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/events.c |   38 ++++++++++++++++++++++++++++++++++++++
 include/xen/events.h |    6 ++++++
 2 files changed, 44 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 690dfad..95eea13 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -849,6 +849,21 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 	return irq;
 }

+static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+                                          unsigned int remote_port)
+{
+        struct evtchn_bind_interdomain bind_interdomain;
+        int err;
+
+        bind_interdomain.remote_dom  = remote_domain;
+        bind_interdomain.remote_port = remote_port;
+
+        err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+                                          &bind_interdomain);
+
+        return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
+}
+

 int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 {
@@ -944,6 +959,29 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
 }
 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);

+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+					  unsigned int remote_port,
+					  irq_handler_t handler,
+					  unsigned long irqflags,
+					  const char *devname,
+					  void *dev_id)
+{
+        int irq, retval;
+
+        irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
+        if (irq < 0)
+                return irq;
+
+        retval = request_irq(irq, handler, irqflags, devname, dev_id);
+        if (retval != 0) {
+                unbind_from_irq(irq);
+                return retval;
+        }
+
+        return irq;
+}
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
+
 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
 			    irq_handler_t handler,
 			    unsigned long irqflags, const char *devname, void *dev_id)
diff --git a/include/xen/events.h b/include/xen/events.h
index 553c664..2fe1644 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -23,6 +23,12 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
 			   unsigned long irqflags,
 			   const char *devname,
 			   void *dev_id);
+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+					  unsigned int remote_port,
+					  irq_handler_t handler,
+					  unsigned long irqflags,
+					  const char *devname,
+					  void *dev_id);

 /*
  * Common unbind function for all event sources. Takes IRQ to unbind from.
--
1.7.4


From b4f664c8de09ab8537e1cd194df29056f803062e Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 8 Nov 2010 14:46:33 -0500
Subject: [PATCH 011/197] pci/xen: Make xen_[find|register|unregister]_domain_owner be _GPL

EXPORT_SYMBOL -> EXPORT_SYMBOL_GPL.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/pci/xen.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 6d2a986..0fa23c8 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -445,7 +445,7 @@ int xen_find_device_domain_owner(struct pci_dev *dev)
 	spin_unlock(&dev_domain_list_spinlock);
 	return domain;
 }
-EXPORT_SYMBOL(xen_find_device_domain_owner);
+EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);

 int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
 {
@@ -467,7 +467,7 @@ int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
 	spin_unlock(&dev_domain_list_spinlock);
 	return 0;
 }
-EXPORT_SYMBOL(xen_register_device_domain_owner);
+EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);

 int xen_unregister_device_domain_owner(struct pci_dev *dev)
 {
@@ -484,4 +484,4 @@ int xen_unregister_device_domain_owner(struct pci_dev *dev)
 	kfree(owner);
 	return 0;
 }
-EXPORT_SYMBOL(xen_unregister_device_domain_owner);
+EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
--
1.7.4


From 443b2aafbdb509f218fcb8f4665f063e3a5e1a92 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 13 Oct 2009 17:22:20 -0400
Subject: [PATCH 012/197] xen-pciback: Initial copy from linux-2.6.18.hg off pciback driver.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/Makefile                    |   17 +
 drivers/xen/pciback/conf_space.c                |  435 ++++++++
 drivers/xen/pciback/conf_space.h                |  126 +++
 drivers/xen/pciback/conf_space_capability.c     |   69 ++
 drivers/xen/pciback/conf_space_capability.h     |   23 +
 drivers/xen/pciback/conf_space_capability_msi.c |   79 ++
 drivers/xen/pciback/conf_space_capability_pm.c  |  126 +++
 drivers/xen/pciback/conf_space_capability_vpd.c |   40 +
 drivers/xen/pciback/conf_space_header.c         |  317 ++++++
 drivers/xen/pciback/conf_space_quirks.c         |  138 +++
 drivers/xen/pciback/conf_space_quirks.h         |   35 +
 drivers/xen/pciback/controller.c                |  443 ++++++++
 drivers/xen/pciback/passthrough.c               |  176 +++
 drivers/xen/pciback/pci_stub.c                  | 1316 +++++++++++++++++++++++
 drivers/xen/pciback/pciback.h                   |  126 +++
 drivers/xen/pciback/pciback_ops.c               |  134 +++
 drivers/xen/pciback/slot.c                      |  187 ++++
 drivers/xen/pciback/vpci.c                      |  242 +++++
 drivers/xen/pciback/xenbus.c                    |  710 ++++++++++++
 19 files changed, 4739 insertions(+), 0 deletions(-)
 create mode 100644 drivers/xen/pciback/Makefile
 create mode 100644 drivers/xen/pciback/conf_space.c
 create mode 100644 drivers/xen/pciback/conf_space.h
 create mode 100644 drivers/xen/pciback/conf_space_capability.c
 create mode 100644 drivers/xen/pciback/conf_space_capability.h
 create mode 100644 drivers/xen/pciback/conf_space_capability_msi.c
 create mode 100644 drivers/xen/pciback/conf_space_capability_pm.c
 create mode 100644 drivers/xen/pciback/conf_space_capability_vpd.c
 create mode 100644 drivers/xen/pciback/conf_space_header.c
 create mode 100644 drivers/xen/pciback/conf_space_quirks.c
 create mode 100644 drivers/xen/pciback/conf_space_quirks.h
 create mode 100644 drivers/xen/pciback/controller.c
 create mode 100644 drivers/xen/pciback/passthrough.c
 create mode 100644 drivers/xen/pciback/pci_stub.c
 create mode 100644 drivers/xen/pciback/pciback.h
 create mode 100644 drivers/xen/pciback/pciback_ops.c
 create mode 100644 drivers/xen/pciback/slot.c
 create mode 100644 drivers/xen/pciback/vpci.c
 create mode 100644 drivers/xen/pciback/xenbus.c

diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
new file mode 100644
index 0000000..106dae7
--- /dev/null
+++ b/drivers/xen/pciback/Makefile
@@ -0,0 +1,17 @@
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
+
+pciback-y := pci_stub.o pciback_ops.o xenbus.o
+pciback-y += conf_space.o conf_space_header.o \
+	     conf_space_capability.o \
+	     conf_space_capability_vpd.o \
+	     conf_space_capability_pm.o \
+             conf_space_quirks.o
+pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
+
+ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
+EXTRA_CFLAGS += -DDEBUG
+endif
diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
new file mode 100644
index 0000000..0c76db1
--- /dev/null
+++ b/drivers/xen/pciback/conf_space.c
@@ -0,0 +1,435 @@
+/*
+ * PCI Backend - Functions for creating a virtual configuration space for
+ *               exported PCI Devices.
+ *               It's dangerous to allow PCI Driver Domains to change their
+ *               device's resources (memory, i/o ports, interrupts). We need to
+ *               restrict changes to certain PCI Configuration registers:
+ *               BARs, INTERRUPT_PIN, most registers in the header...
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+#include "conf_space_quirks.h"
+
+static int permissive;
+module_param(permissive, bool, 0644);
+
+#define DEFINE_PCI_CONFIG(op,size,type) 			\
+int pciback_##op##_config_##size 				\
+(struct pci_dev *dev, int offset, type value, void *data)	\
+{								\
+	return pci_##op##_config_##size (dev, offset, value);	\
+}
+
+DEFINE_PCI_CONFIG(read, byte, u8 *)
+DEFINE_PCI_CONFIG(read, word, u16 *)
+DEFINE_PCI_CONFIG(read, dword, u32 *)
+
+DEFINE_PCI_CONFIG(write, byte, u8)
+DEFINE_PCI_CONFIG(write, word, u16)
+DEFINE_PCI_CONFIG(write, dword, u32)
+
+static int conf_space_read(struct pci_dev *dev,
+			   const struct config_field_entry *entry,
+			   int offset, u32 *value)
+{
+	int ret = 0;
+	const struct config_field *field = entry->field;
+
+	*value = 0;
+
+	switch (field->size) {
+	case 1:
+		if (field->u.b.read)
+			ret = field->u.b.read(dev, offset, (u8 *) value,
+					      entry->data);
+		break;
+	case 2:
+		if (field->u.w.read)
+			ret = field->u.w.read(dev, offset, (u16 *) value,
+					      entry->data);
+		break;
+	case 4:
+		if (field->u.dw.read)
+			ret = field->u.dw.read(dev, offset, value, entry->data);
+		break;
+	}
+	return ret;
+}
+
+static int conf_space_write(struct pci_dev *dev,
+			    const struct config_field_entry *entry,
+			    int offset, u32 value)
+{
+	int ret = 0;
+	const struct config_field *field = entry->field;
+
+	switch (field->size) {
+	case 1:
+		if (field->u.b.write)
+			ret = field->u.b.write(dev, offset, (u8) value,
+					       entry->data);
+		break;
+	case 2:
+		if (field->u.w.write)
+			ret = field->u.w.write(dev, offset, (u16) value,
+					       entry->data);
+		break;
+	case 4:
+		if (field->u.dw.write)
+			ret = field->u.dw.write(dev, offset, value,
+						entry->data);
+		break;
+	}
+	return ret;
+}
+
+static inline u32 get_mask(int size)
+{
+	if (size == 1)
+		return 0xff;
+	else if (size == 2)
+		return 0xffff;
+	else
+		return 0xffffffff;
+}
+
+static inline int valid_request(int offset, int size)
+{
+	/* Validate request (no un-aligned requests) */
+	if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
+		return 1;
+	return 0;
+}
+
+static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
+			      int offset)
+{
+	if (offset >= 0) {
+		new_val_mask <<= (offset * 8);
+		new_val <<= (offset * 8);
+	} else {
+		new_val_mask >>= (offset * -8);
+		new_val >>= (offset * -8);
+	}
+	val = (val & ~new_val_mask) | (new_val & new_val_mask);
+
+	return val;
+}
+
+static int pcibios_err_to_errno(int err)
+{
+	switch (err) {
+	case PCIBIOS_SUCCESSFUL:
+		return XEN_PCI_ERR_success;
+	case PCIBIOS_DEVICE_NOT_FOUND:
+		return XEN_PCI_ERR_dev_not_found;
+	case PCIBIOS_BAD_REGISTER_NUMBER:
+		return XEN_PCI_ERR_invalid_offset;
+	case PCIBIOS_FUNC_NOT_SUPPORTED:
+		return XEN_PCI_ERR_not_implemented;
+	case PCIBIOS_SET_FAILED:
+		return XEN_PCI_ERR_access_denied;
+	}
+	return err;
+}
+
+int pciback_config_read(struct pci_dev *dev, int offset, int size,
+			u32 * ret_val)
+{
+	int err = 0;
+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+	const struct config_field_entry *cfg_entry;
+	const struct config_field *field;
+	int req_start, req_end, field_start, field_end;
+	/* if read fails for any reason, return 0 (as if device didn't respond) */
+	u32 value = 0, tmp_val;
+
+	if (unlikely(verbose_request))
+		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
+		       pci_name(dev), size, offset);
+
+	if (!valid_request(offset, size)) {
+		err = XEN_PCI_ERR_invalid_offset;
+		goto out;
+	}
+
+	/* Get the real value first, then modify as appropriate */
+	switch (size) {
+	case 1:
+		err = pci_read_config_byte(dev, offset, (u8 *) & value);
+		break;
+	case 2:
+		err = pci_read_config_word(dev, offset, (u16 *) & value);
+		break;
+	case 4:
+		err = pci_read_config_dword(dev, offset, &value);
+		break;
+	}
+
+	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+		field = cfg_entry->field;
+
+		req_start = offset;
+		req_end = offset + size;
+		field_start = OFFSET(cfg_entry);
+		field_end = OFFSET(cfg_entry) + field->size;
+
+		if ((req_start >= field_start && req_start < field_end)
+		    || (req_end > field_start && req_end <= field_end)) {
+			err = conf_space_read(dev, cfg_entry, field_start,
+					      &tmp_val);
+			if (err)
+				goto out;
+
+			value = merge_value(value, tmp_val,
+					    get_mask(field->size),
+					    field_start - req_start);
+		}
+	}
+
+      out:
+	if (unlikely(verbose_request))
+		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
+		       pci_name(dev), size, offset, value);
+
+	*ret_val = value;
+	return pcibios_err_to_errno(err);
+}
+
+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
+{
+	int err = 0, handled = 0;
+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+	const struct config_field_entry *cfg_entry;
+	const struct config_field *field;
+	u32 tmp_val;
+	int req_start, req_end, field_start, field_end;
+
+	if (unlikely(verbose_request))
+		printk(KERN_DEBUG
+		       "pciback: %s: write request %d bytes at 0x%x = %x\n",
+		       pci_name(dev), size, offset, value);
+
+	if (!valid_request(offset, size))
+		return XEN_PCI_ERR_invalid_offset;
+
+	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+		field = cfg_entry->field;
+
+		req_start = offset;
+		req_end = offset + size;
+		field_start = OFFSET(cfg_entry);
+		field_end = OFFSET(cfg_entry) + field->size;
+
+		if ((req_start >= field_start && req_start < field_end)
+		    || (req_end > field_start && req_end <= field_end)) {
+			tmp_val = 0;
+
+			err = pciback_config_read(dev, field_start,
+						  field->size, &tmp_val);
+			if (err)
+				break;
+
+			tmp_val = merge_value(tmp_val, value, get_mask(size),
+					      req_start - field_start);
+
+			err = conf_space_write(dev, cfg_entry, field_start,
+					       tmp_val);
+
+			/* handled is set true here, but not every byte
+			 * may have been written! Properly detecting if
+			 * every byte is handled is unnecessary as the
+			 * flag is used to detect devices that need
+			 * special helpers to work correctly.
+			 */
+			handled = 1;
+		}
+	}
+
+	if (!handled && !err) {
+		/* By default, anything not specificially handled above is
+		 * read-only. The permissive flag changes this behavior so
+		 * that anything not specifically handled above is writable.
+		 * This means that some fields may still be read-only because
+		 * they have entries in the config_field list that intercept
+		 * the write and do nothing. */
+		if (dev_data->permissive || permissive) {
+			switch (size) {
+			case 1:
+				err = pci_write_config_byte(dev, offset,
+							    (u8) value);
+				break;
+			case 2:
+				err = pci_write_config_word(dev, offset,
+							    (u16) value);
+				break;
+			case 4:
+				err = pci_write_config_dword(dev, offset,
+							     (u32) value);
+				break;
+			}
+		} else if (!dev_data->warned_on_write) {
+			dev_data->warned_on_write = 1;
+			dev_warn(&dev->dev, "Driver tried to write to a "
+				 "read-only configuration space field at offset "
+				 "0x%x, size %d. This may be harmless, but if "
+				 "you have problems with your device:\n"
+				 "1) see permissive attribute in sysfs\n"
+				 "2) report problems to the xen-devel "
+				 "mailing list along with details of your "
+				 "device obtained from lspci.\n", offset, size);
+		}
+	}
+
+	return pcibios_err_to_errno(err);
+}
+
+void pciback_config_free_dyn_fields(struct pci_dev *dev)
+{
+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+	struct config_field_entry *cfg_entry, *t;
+	const struct config_field *field;
+
+	dev_dbg(&dev->dev,
+		"free-ing dynamically allocated virtual configuration space fields\n");
+	if (!dev_data)
+		return;
+
+	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
+		field = cfg_entry->field;
+
+		if (field->clean) {
+			field->clean((struct config_field *)field);
+
+			if (cfg_entry->data)
+				kfree(cfg_entry->data);
+
+			list_del(&cfg_entry->list);
+			kfree(cfg_entry);
+		}
+
+	}
+}
+
+void pciback_config_reset_dev(struct pci_dev *dev)
+{
+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+	const struct config_field_entry *cfg_entry;
+	const struct config_field *field;
+
+	dev_dbg(&dev->dev, "resetting virtual configuration space\n");
+	if (!dev_data)
+		return;
+
+	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+		field = cfg_entry->field;
+
+		if (field->reset)
+			field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
+	}
+}
+
+void pciback_config_free_dev(struct pci_dev *dev)
+{
+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+	struct config_field_entry *cfg_entry, *t;
+	const struct config_field *field;
+
+	dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
+	if (!dev_data)
+		return;
+
+	list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
+		list_del(&cfg_entry->list);
+
+		field = cfg_entry->field;
+
+		if (field->release)
+			field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
+
+		kfree(cfg_entry);
+	}
+}
+
+int pciback_config_add_field_offset(struct pci_dev *dev,
+				    const struct config_field *field,
+				    unsigned int base_offset)
+{
+	int err = 0;
+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+	struct config_field_entry *cfg_entry;
+	void *tmp;
+
+	cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
+	if (!cfg_entry) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	cfg_entry->data = NULL;
+	cfg_entry->field = field;
+	cfg_entry->base_offset = base_offset;
+
+	/* silently ignore duplicate fields */
+	err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
+	if (err)
+		goto out;
+
+	if (field->init) {
+		tmp = field->init(dev, OFFSET(cfg_entry));
+
+		if (IS_ERR(tmp)) {
+			err = PTR_ERR(tmp);
+			goto out;
+		}
+
+		cfg_entry->data = tmp;
+	}
+
+	dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
+		OFFSET(cfg_entry));
+	list_add_tail(&cfg_entry->list, &dev_data->config_fields);
+
+      out:
+	if (err)
+		kfree(cfg_entry);
+
+	return err;
+}
+
+/* This sets up the device's virtual configuration space to keep track of
+ * certain registers (like the base address registers (BARs) so that we can
+ * keep the client from manipulating them directly.
+ */
+int pciback_config_init_dev(struct pci_dev *dev)
+{
+	int err = 0;
+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+
+	dev_dbg(&dev->dev, "initializing virtual configuration space\n");
+
+	INIT_LIST_HEAD(&dev_data->config_fields);
+
+	err = pciback_config_header_add_fields(dev);
+	if (err)
+		goto out;
+
+	err = pciback_config_capability_add_fields(dev);
+	if (err)
+		goto out;
+
+	err = pciback_config_quirks_init(dev);
+
+      out:
+	return err;
+}
+
+int pciback_config_init(void)
+{
+	return pciback_config_capability_init();
+}
diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
new file mode 100644
index 0000000..fe746ef
--- /dev/null
+++ b/drivers/xen/pciback/conf_space.h
@@ -0,0 +1,126 @@
+/*
+ * PCI Backend - Common data structures for overriding the configuration space
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#ifndef __XEN_PCIBACK_CONF_SPACE_H__
+#define __XEN_PCIBACK_CONF_SPACE_H__
+
+#include <linux/list.h>
+#include <linux/err.h>
+
+/* conf_field_init can return an errno in a ptr with ERR_PTR() */
+typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
+typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
+typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
+
+typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
+				 void *data);
+typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
+				void *data);
+typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
+				void *data);
+typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
+				void *data);
+typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
+			       void *data);
+typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
+			       void *data);
+
+/* These are the fields within the configuration space which we
+ * are interested in intercepting reads/writes to and changing their
+ * values.
+ */
+struct config_field {
+	unsigned int offset;
+	unsigned int size;
+	unsigned int mask;
+	conf_field_init init;
+	conf_field_reset reset;
+	conf_field_free release;
+	void (*clean) (struct config_field * field);
+	union {
+		struct {
+			conf_dword_write write;
+			conf_dword_read read;
+		} dw;
+		struct {
+			conf_word_write write;
+			conf_word_read read;
+		} w;
+		struct {
+			conf_byte_write write;
+			conf_byte_read read;
+		} b;
+	} u;
+	struct list_head list;
+};
+
+struct config_field_entry {
+	struct list_head list;
+	const struct config_field *field;
+	unsigned int base_offset;
+	void *data;
+};
+
+#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
+
+/* Add fields to a device - the add_fields macro expects to get a pointer to
+ * the first entry in an array (of which the ending is marked by size==0)
+ */
+int pciback_config_add_field_offset(struct pci_dev *dev,
+				    const struct config_field *field,
+				    unsigned int offset);
+
+static inline int pciback_config_add_field(struct pci_dev *dev,
+					   const struct config_field *field)
+{
+	return pciback_config_add_field_offset(dev, field, 0);
+}
+
+static inline int pciback_config_add_fields(struct pci_dev *dev,
+					    const struct config_field *field)
+{
+	int i, err = 0;
+	for (i = 0; field[i].size != 0; i++) {
+		err = pciback_config_add_field(dev, &field[i]);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
+						   const struct config_field *field,
+						   unsigned int offset)
+{
+	int i, err = 0;
+	for (i = 0; field[i].size != 0; i++) {
+		err = pciback_config_add_field_offset(dev, &field[i], offset);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+/* Read/Write the real configuration space */
+int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
+			     void *data);
+int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
+			     void *data);
+int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
+			      void *data);
+int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
+			      void *data);
+int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
+			      void *data);
+int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
+			       void *data);
+
+int pciback_config_capability_init(void);
+
+int pciback_config_header_add_fields(struct pci_dev *dev);
+int pciback_config_capability_add_fields(struct pci_dev *dev);
+
+#endif				/* __XEN_PCIBACK_CONF_SPACE_H__ */
diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
new file mode 100644
index 0000000..50efca4
--- /dev/null
+++ b/drivers/xen/pciback/conf_space_capability.c
@@ -0,0 +1,69 @@
+/*
+ * PCI Backend - Handles the virtual fields found on the capability lists
+ *               in the configuration space.
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+#include "conf_space_capability.h"
+
+static LIST_HEAD(capabilities);
+
+static const struct config_field caplist_header[] = {
+	{
+	 .offset    = PCI_CAP_LIST_ID,
+	 .size      = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
+	 .u.w.read  = pciback_read_config_word,
+	 .u.w.write = NULL,
+	},
+	{}
+};
+
+static inline void register_capability(struct pciback_config_capability *cap)
+{
+	list_add_tail(&cap->cap_list, &capabilities);
+}
+
+int pciback_config_capability_add_fields(struct pci_dev *dev)
+{
+	int err = 0;
+	struct pciback_config_capability *cap;
+	int cap_offset;
+
+	list_for_each_entry(cap, &capabilities, cap_list) {
+		cap_offset = pci_find_capability(dev, cap->capability);
+		if (cap_offset) {
+			dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
+				cap->capability, cap_offset);
+
+			err = pciback_config_add_fields_offset(dev,
+							       caplist_header,
+							       cap_offset);
+			if (err)
+				goto out;
+			err = pciback_config_add_fields_offset(dev,
+							       cap->fields,
+							       cap_offset);
+			if (err)
+				goto out;
+		}
+	}
+
+      out:
+	return err;
+}
+
+extern struct pciback_config_capability pciback_config_capability_vpd;
+extern struct pciback_config_capability pciback_config_capability_pm;
+
+int pciback_config_capability_init(void)
+{
+	register_capability(&pciback_config_capability_vpd);
+	register_capability(&pciback_config_capability_pm);
+
+	return 0;
+}
diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
new file mode 100644
index 0000000..823392e
--- /dev/null
+++ b/drivers/xen/pciback/conf_space_capability.h
@@ -0,0 +1,23 @@
+/*
+ * PCI Backend - Data structures for special overlays for structures on
+ *               the capability list.
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
+#define __PCIBACK_CONFIG_CAPABILITY_H__
+
+#include <linux/pci.h>
+#include <linux/list.h>
+
+struct pciback_config_capability {
+	struct list_head cap_list;
+
+	int capability;
+
+	/* If the device has the capability found above, add these fields */
+	const struct config_field *fields;
+};
+
+#endif
diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
new file mode 100644
index 0000000..762e396
--- /dev/null
+++ b/drivers/xen/pciback/conf_space_capability_msi.c
@@ -0,0 +1,79 @@
+/*
+ * PCI Backend -- Configuration overlay for MSI capability
+ */
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include "conf_space.h"
+#include "conf_space_capability.h"
+#include <xen/interface/io/pciif.h>
+#include "pciback.h"
+
+int pciback_enable_msi(struct pciback_device *pdev,
+		struct pci_dev *dev, struct xen_pci_op *op)
+{
+	int otherend = pdev->xdev->otherend_id;
+	int status;
+
+	status = pci_enable_msi(dev);
+
+	if (status) {
+		printk("error enable msi for guest %x status %x\n", otherend, status);
+		op->value = 0;
+		return XEN_PCI_ERR_op_failed;
+	}
+
+	op->value = dev->irq;
+	return 0;
+}
+
+int pciback_disable_msi(struct pciback_device *pdev,
+		struct pci_dev *dev, struct xen_pci_op *op)
+{
+	pci_disable_msi(dev);
+
+	op->value = dev->irq;
+	return 0;
+}
+
+int pciback_enable_msix(struct pciback_device *pdev,
+		struct pci_dev *dev, struct xen_pci_op *op)
+{
+	int i, result;
+	struct msix_entry *entries;
+
+	if (op->value > SH_INFO_MAX_VEC)
+		return -EINVAL;
+
+	entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
+	if (entries == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < op->value; i++) {
+		entries[i].entry = op->msix_entries[i].entry;
+		entries[i].vector = op->msix_entries[i].vector;
+	}
+
+	result = pci_enable_msix(dev, entries, op->value);
+
+	for (i = 0; i < op->value; i++) {
+		op->msix_entries[i].entry = entries[i].entry;
+		op->msix_entries[i].vector = entries[i].vector;
+	}
+
+	kfree(entries);
+
+	op->value = result;
+
+	return result;
+}
+
+int pciback_disable_msix(struct pciback_device *pdev,
+		struct pci_dev *dev, struct xen_pci_op *op)
+{
+
+	pci_disable_msix(dev);
+
+	op->value = dev->irq;
+	return 0;
+}
+
diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
new file mode 100644
index 0000000..e2f99c7
--- /dev/null
+++ b/drivers/xen/pciback/conf_space_capability_pm.c
@@ -0,0 +1,126 @@
+/*
+ * PCI Backend - Configuration space overlay for power management
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/pci.h>
+#include "conf_space.h"
+#include "conf_space_capability.h"
+
+static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
+			void *data)
+{
+	int err;
+	u16 real_value;
+
+	err = pci_read_config_word(dev, offset, &real_value);
+	if (err)
+		goto out;
+
+	*value = real_value & ~PCI_PM_CAP_PME_MASK;
+
+      out:
+	return err;
+}
+
+/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
+ * Can't allow driver domain to enable PMEs - they're shared */
+#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
+
+static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
+			 void *data)
+{
+	int err;
+	u16 old_value;
+	pci_power_t new_state, old_state;
+
+	err = pci_read_config_word(dev, offset, &old_value);
+	if (err)
+		goto out;
+
+	old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
+	new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
+
+	new_value &= PM_OK_BITS;
+	if ((old_value & PM_OK_BITS) != new_value) {
+		new_value = (old_value & ~PM_OK_BITS) | new_value;
+		err = pci_write_config_word(dev, offset, new_value);
+		if (err)
+			goto out;
+	}
+
+	/* Let pci core handle the power management change */
+	dev_dbg(&dev->dev, "set power state to %x\n", new_state);
+	err = pci_set_power_state(dev, new_state);
+	if (err) {
+		err = PCIBIOS_SET_FAILED;
+		goto out;
+	}
+
+	/*
+	 * Device may lose PCI config info on D3->D0 transition. This
+	 * is a problem for some guests which will not reset BARs. Even
+	 * those that have a go will be foiled by our BAR-write handler
+	 * which will discard the write! Since Linux won't re-init
+	 * the config space automatically in all cases, we do it here.
+	 * Future: Should we re-initialise all first 64 bytes of config space?
+	 */
+	if (new_state == PCI_D0 &&
+	    (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
+	    !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
+		pci_restore_bars(dev);
+
+ out:
+	return err;
+}
+
+/* Ensure PMEs are disabled */
+static void *pm_ctrl_init(struct pci_dev *dev, int offset)
+{
+	int err;
+	u16 value;
+
+	err = pci_read_config_word(dev, offset, &value);
+	if (err)
+		goto out;
+
+	if (value & PCI_PM_CTRL_PME_ENABLE) {
+		value &= ~PCI_PM_CTRL_PME_ENABLE;
+		err = pci_write_config_word(dev, offset, value);
+	}
+
+      out:
+	return ERR_PTR(err);
+}
+
+static const struct config_field caplist_pm[] = {
+	{
+		.offset     = PCI_PM_PMC,
+		.size       = 2,
+		.u.w.read   = pm_caps_read,
+	},
+	{
+		.offset     = PCI_PM_CTRL,
+		.size       = 2,
+		.init       = pm_ctrl_init,
+		.u.w.read   = pciback_read_config_word,
+		.u.w.write  = pm_ctrl_write,
+	},
+	{
+		.offset     = PCI_PM_PPB_EXTENSIONS,
+		.size       = 1,
+		.u.b.read   = pciback_read_config_byte,
+	},
+	{
+		.offset     = PCI_PM_DATA_REGISTER,
+		.size       = 1,
+		.u.b.read   = pciback_read_config_byte,
+	},
+	{}
+};
+
+struct pciback_config_capability pciback_config_capability_pm = {
+	.capability = PCI_CAP_ID_PM,
+	.fields = caplist_pm,
+};
diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
new file mode 100644
index 0000000..920cb4a
--- /dev/null
+++ b/drivers/xen/pciback/conf_space_capability_vpd.c
@@ -0,0 +1,40 @@
+/*
+ * PCI Backend - Configuration space overlay for Vital Product Data
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/pci.h>
+#include "conf_space.h"
+#include "conf_space_capability.h"
+
+static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
+			     void *data)
+{
+	/* Disallow writes to the vital product data */
+	if (value & PCI_VPD_ADDR_F)
+		return PCIBIOS_SET_FAILED;
+	else
+		return pci_write_config_word(dev, offset, value);
+}
+
+static const struct config_field caplist_vpd[] = {
+	{
+	 .offset    = PCI_VPD_ADDR,
+	 .size      = 2,
+	 .u.w.read  = pciback_read_config_word,
+	 .u.w.write = vpd_address_write,
+	 },
+	{
+	 .offset     = PCI_VPD_DATA,
+	 .size       = 4,
+	 .u.dw.read  = pciback_read_config_dword,
+	 .u.dw.write = NULL,
+	 },
+	{}
+};
+
+struct pciback_config_capability pciback_config_capability_vpd = {
+	.capability = PCI_CAP_ID_VPD,
+	.fields = caplist_vpd,
+};
diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
new file mode 100644
index 0000000..f794e12
--- /dev/null
+++ b/drivers/xen/pciback/conf_space_header.c
@@ -0,0 +1,317 @@
+/*
+ * PCI Backend - Handles the virtual fields in the configuration space headers.
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+
+struct pci_bar_info {
+	u32 val;
+	u32 len_val;
+	int which;
+};
+
+#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
+#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
+
+static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
+{
+	int err;
+
+	if (!dev->is_enabled && is_enable_cmd(value)) {
+		if (unlikely(verbose_request))
+			printk(KERN_DEBUG "pciback: %s: enable\n",
+			       pci_name(dev));
+		err = pci_enable_device(dev);
+		if (err)
+			return err;
+	} else if (dev->is_enabled && !is_enable_cmd(value)) {
+		if (unlikely(verbose_request))
+			printk(KERN_DEBUG "pciback: %s: disable\n",
+			       pci_name(dev));
+		pci_disable_device(dev);
+	}
+
+	if (!dev->is_busmaster && is_master_cmd(value)) {
+		if (unlikely(verbose_request))
+			printk(KERN_DEBUG "pciback: %s: set bus master\n",
+			       pci_name(dev));
+		pci_set_master(dev);
+	}
+
+	if (value & PCI_COMMAND_INVALIDATE) {
+		if (unlikely(verbose_request))
+			printk(KERN_DEBUG
+			       "pciback: %s: enable memory-write-invalidate\n",
+			       pci_name(dev));
+		err = pci_set_mwi(dev);
+		if (err) {
+			printk(KERN_WARNING
+			       "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
+			       pci_name(dev), err);
+			value &= ~PCI_COMMAND_INVALIDATE;
+		}
+	}
+
+	return pci_write_config_word(dev, offset, value);
+}
+
+static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
+{
+	struct pci_bar_info *bar = data;
+
+	if (unlikely(!bar)) {
+		printk(KERN_WARNING "pciback: driver data not found for %s\n",
+		       pci_name(dev));
+		return XEN_PCI_ERR_op_failed;
+	}
+
+	/* A write to obtain the length must happen as a 32-bit write.
+	 * This does not (yet) support writing individual bytes
+	 */
+	if (value == ~PCI_ROM_ADDRESS_ENABLE)
+		bar->which = 1;
+	else {
+		u32 tmpval;
+		pci_read_config_dword(dev, offset, &tmpval);
+		if (tmpval != bar->val && value == bar->val) {
+			/* Allow restoration of bar value. */
+			pci_write_config_dword(dev, offset, bar->val);
+		}
+		bar->which = 0;
+	}
+
+	/* Do we need to support enabling/disabling the rom address here? */
+
+	return 0;
+}
+
+/* For the BARs, only allow writes which write ~0 or
+ * the correct resource information
+ * (Needed for when the driver probes the resource usage)
+ */
+static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
+{
+	struct pci_bar_info *bar = data;
+
+	if (unlikely(!bar)) {
+		printk(KERN_WARNING "pciback: driver data not found for %s\n",
+		       pci_name(dev));
+		return XEN_PCI_ERR_op_failed;
+	}
+
+	/* A write to obtain the length must happen as a 32-bit write.
+	 * This does not (yet) support writing individual bytes
+	 */
+	if (value == ~0)
+		bar->which = 1;
+	else {
+		u32 tmpval;
+		pci_read_config_dword(dev, offset, &tmpval);
+		if (tmpval != bar->val && value == bar->val) {
+			/* Allow restoration of bar value. */
+			pci_write_config_dword(dev, offset, bar->val);
+		}
+		bar->which = 0;
+	}
+
+	return 0;
+}
+
+static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
+{
+	struct pci_bar_info *bar = data;
+
+	if (unlikely(!bar)) {
+		printk(KERN_WARNING "pciback: driver data not found for %s\n",
+		       pci_name(dev));
+		return XEN_PCI_ERR_op_failed;
+	}
+
+	*value = bar->which ? bar->len_val : bar->val;
+
+	return 0;
+}
+
+static inline void read_dev_bar(struct pci_dev *dev,
+				struct pci_bar_info *bar_info, int offset,
+				u32 len_mask)
+{
+	pci_read_config_dword(dev, offset, &bar_info->val);
+	pci_write_config_dword(dev, offset, len_mask);
+	pci_read_config_dword(dev, offset, &bar_info->len_val);
+	pci_write_config_dword(dev, offset, bar_info->val);
+}
+
+static void *bar_init(struct pci_dev *dev, int offset)
+{
+	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
+
+	if (!bar)
+		return ERR_PTR(-ENOMEM);
+
+	read_dev_bar(dev, bar, offset, ~0);
+	bar->which = 0;
+
+	return bar;
+}
+
+static void *rom_init(struct pci_dev *dev, int offset)
+{
+	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
+
+	if (!bar)
+		return ERR_PTR(-ENOMEM);
+
+	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
+	bar->which = 0;
+
+	return bar;
+}
+
+static void bar_reset(struct pci_dev *dev, int offset, void *data)
+{
+	struct pci_bar_info *bar = data;
+
+	bar->which = 0;
+}
+
+static void bar_release(struct pci_dev *dev, int offset, void *data)
+{
+	kfree(data);
+}
+
+static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
+			  void *data)
+{
+	*value = (u8) dev->irq;
+
+	return 0;
+}
+
+static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
+{
+	u8 cur_value;
+	int err;
+
+	err = pci_read_config_byte(dev, offset, &cur_value);
+	if (err)
+		goto out;
+
+	if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
+	    || value == PCI_BIST_START)
+		err = pci_write_config_byte(dev, offset, value);
+
+      out:
+	return err;
+}
+
+static const struct config_field header_common[] = {
+	{
+	 .offset    = PCI_COMMAND,
+	 .size      = 2,
+	 .u.w.read  = pciback_read_config_word,
+	 .u.w.write = command_write,
+	},
+	{
+	 .offset    = PCI_INTERRUPT_LINE,
+	 .size      = 1,
+	 .u.b.read  = interrupt_read,
+	},
+	{
+	 .offset    = PCI_INTERRUPT_PIN,
+	 .size      = 1,
+	 .u.b.read  = pciback_read_config_byte,
+	},
+	{
+	 /* Any side effects of letting driver domain control cache line? */
+	 .offset    = PCI_CACHE_LINE_SIZE,
+	 .size      = 1,
+	 .u.b.read  = pciback_read_config_byte,
+	 .u.b.write = pciback_write_config_byte,
+	},
+	{
+	 .offset    = PCI_LATENCY_TIMER,
+	 .size      = 1,
+	 .u.b.read  = pciback_read_config_byte,
+	},
+	{
+	 .offset    = PCI_BIST,
+	 .size      = 1,
+	 .u.b.read  = pciback_read_config_byte,
+	 .u.b.write = bist_write,
+	},
+	{}
+};
+
+#define CFG_FIELD_BAR(reg_offset) 			\
+	{ 						\
+	 .offset     = reg_offset, 			\
+	 .size       = 4, 				\
+	 .init       = bar_init, 			\
+	 .reset      = bar_reset, 			\
+	 .release    = bar_release, 			\
+	 .u.dw.read  = bar_read, 			\
+	 .u.dw.write = bar_write, 			\
+	 }
+
+#define CFG_FIELD_ROM(reg_offset) 			\
+	{ 						\
+	 .offset     = reg_offset, 			\
+	 .size       = 4, 				\
+	 .init       = rom_init, 			\
+	 .reset      = bar_reset, 			\
+	 .release    = bar_release, 			\
+	 .u.dw.read  = bar_read, 			\
+	 .u.dw.write = rom_write, 			\
+	 }
+
+static const struct config_field header_0[] = {
+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
+	CFG_FIELD_ROM(PCI_ROM_ADDRESS),
+	{}
+};
+
+static const struct config_field header_1[] = {
+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
+	CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
+	CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
+	{}
+};
+
+int pciback_config_header_add_fields(struct pci_dev *dev)
+{
+	int err;
+
+	err = pciback_config_add_fields(dev, header_common);
+	if (err)
+		goto out;
+
+	switch (dev->hdr_type) {
+	case PCI_HEADER_TYPE_NORMAL:
+		err = pciback_config_add_fields(dev, header_0);
+		break;
+
+	case PCI_HEADER_TYPE_BRIDGE:
+		err = pciback_config_add_fields(dev, header_1);
+		break;
+
+	default:
+		err = -EINVAL;
+		printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
+		       pci_name(dev), dev->hdr_type);
+		break;
+	}
+
+      out:
+	return err;
+}
diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
new file mode 100644
index 0000000..244a438
--- /dev/null
+++ b/drivers/xen/pciback/conf_space_quirks.c
@@ -0,0 +1,138 @@
+/*
+ * PCI Backend - Handle special overlays for broken devices.
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+#include "conf_space_quirks.h"
+
+LIST_HEAD(pciback_quirks);
+
+static inline const struct pci_device_id *
+match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
+{
+	if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
+	    (id->device == PCI_ANY_ID || id->device == dev->device) &&
+	    (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
+	    (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
+	    !((id->class ^ dev->class) & id->class_mask))
+		return id;
+	return NULL;
+}
+
+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
+{
+	struct pciback_config_quirk *tmp_quirk;
+
+	list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
+		if (match_one_device(&tmp_quirk->devid, dev) != NULL)
+			goto out;
+	tmp_quirk = NULL;
+	printk(KERN_DEBUG
+	       "quirk didn't match any device pciback knows about\n");
+      out:
+	return tmp_quirk;
+}
+
+static inline void register_quirk(struct pciback_config_quirk *quirk)
+{
+	list_add_tail(&quirk->quirks_list, &pciback_quirks);
+}
+
+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
+{
+	int ret = 0;
+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+	struct config_field_entry *cfg_entry;
+
+	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+		if ( OFFSET(cfg_entry) == reg) {
+			ret = 1;
+			break;
+		}
+	}
+	return ret;
+}
+
+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
+				    *field)
+{
+	int err = 0;
+
+	switch (field->size) {
+	case 1:
+		field->u.b.read = pciback_read_config_byte;
+		field->u.b.write = pciback_write_config_byte;
+		break;
+	case 2:
+		field->u.w.read = pciback_read_config_word;
+		field->u.w.write = pciback_write_config_word;
+		break;
+	case 4:
+		field->u.dw.read = pciback_read_config_dword;
+		field->u.dw.write = pciback_write_config_dword;
+		break;
+	default:
+		err = -EINVAL;
+		goto out;
+	}
+
+	pciback_config_add_field(dev, field);
+
+      out:
+	return err;
+}
+
+int pciback_config_quirks_init(struct pci_dev *dev)
+{
+	struct pciback_config_quirk *quirk;
+	int ret = 0;
+
+	quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
+	if (!quirk) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	quirk->devid.vendor = dev->vendor;
+	quirk->devid.device = dev->device;
+	quirk->devid.subvendor = dev->subsystem_vendor;
+	quirk->devid.subdevice = dev->subsystem_device;
+	quirk->devid.class = 0;
+	quirk->devid.class_mask = 0;
+	quirk->devid.driver_data = 0UL;
+
+	quirk->pdev = dev;
+
+	register_quirk(quirk);
+      out:
+	return ret;
+}
+
+void pciback_config_field_free(struct config_field *field)
+{
+	kfree(field);
+}
+
+int pciback_config_quirk_release(struct pci_dev *dev)
+{
+	struct pciback_config_quirk *quirk;
+	int ret = 0;
+
+	quirk = pciback_find_quirk(dev);
+	if (!quirk) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	list_del(&quirk->quirks_list);
+	kfree(quirk);
+
+      out:
+	return ret;
+}
diff --git a/drivers/xen/pciback/conf_space_quirks.h b/drivers/xen/pciback/conf_space_quirks.h
new file mode 100644
index 0000000..acd0e1a
--- /dev/null
+++ b/drivers/xen/pciback/conf_space_quirks.h
@@ -0,0 +1,35 @@
+/*
+ * PCI Backend - Data structures for special overlays for broken devices.
+ *
+ * Ryan Wilson <hap9@epoch.ncsc.mil>
+ * Chris Bookholt <hap10@epoch.ncsc.mil>
+ */
+
+#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
+#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
+
+#include <linux/pci.h>
+#include <linux/list.h>
+
+struct pciback_config_quirk {
+	struct list_head quirks_list;
+	struct pci_device_id devid;
+	struct pci_dev *pdev;
+};
+
+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
+
+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
+				    *field);
+
+int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
+
+int pciback_config_quirks_init(struct pci_dev *dev);
+
+void pciback_config_field_free(struct config_field *field);
+
+int pciback_config_quirk_release(struct pci_dev *dev);
+
+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
+
+#endif
diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
new file mode 100644
index 0000000..294e48f
--- /dev/null
+++ b/drivers/xen/pciback/controller.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
+ *      Alex Williamson <alex.williamson@hp.com>
+ *
+ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI
+ * controllers.  Devices under the same PCI controller are exposed on the
+ * same virtual domain:bus.  Within a bus, device slots are virtualized
+ * to compact the bus.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/acpi.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include "pciback.h"
+
+#define PCI_MAX_BUSSES	255
+#define PCI_MAX_SLOTS	32
+
+struct controller_dev_entry {
+	struct list_head list;
+	struct pci_dev *dev;
+	unsigned int devfn;
+};
+
+struct controller_list_entry {
+	struct list_head list;
+	struct pci_controller *controller;
+	unsigned int domain;
+	unsigned int bus;
+	unsigned int next_devfn;
+	struct list_head dev_list;
+};
+
+struct controller_dev_data {
+	struct list_head list;
+	unsigned int next_domain;
+	unsigned int next_bus;
+	spinlock_t lock;
+};
+
+struct walk_info {
+	struct pciback_device *pdev;
+	int resource_count;
+	int root_num;
+};
+
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
+				    unsigned int domain, unsigned int bus,
+				    unsigned int devfn)
+{
+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
+	struct controller_dev_entry *dev_entry;
+	struct controller_list_entry *cntrl_entry;
+	struct pci_dev *dev = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_data->lock, flags);
+
+	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
+		if (cntrl_entry->domain != domain ||
+		    cntrl_entry->bus != bus)
+			continue;
+
+		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
+			if (devfn == dev_entry->devfn) {
+				dev = dev_entry->dev;
+				goto found;
+			}
+		}
+	}
+found:
+	spin_unlock_irqrestore(&dev_data->lock, flags);
+
+	return dev;
+}
+
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+			int devid, publish_pci_dev_cb publish_cb)
+{
+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
+	struct controller_dev_entry *dev_entry;
+	struct controller_list_entry *cntrl_entry;
+	struct pci_controller *dev_controller = PCI_CONTROLLER(dev);
+	unsigned long flags;
+	int ret = 0, found = 0;
+
+	spin_lock_irqsave(&dev_data->lock, flags);
+
+	/* Look to see if we already have a domain:bus for this controller */
+	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
+		if (cntrl_entry->controller == dev_controller) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found) {
+		cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC);
+		if (!cntrl_entry) {
+			ret =  -ENOMEM;
+			goto out;
+		}
+
+		cntrl_entry->controller = dev_controller;
+		cntrl_entry->next_devfn = PCI_DEVFN(0, 0);
+
+		cntrl_entry->domain = dev_data->next_domain;
+		cntrl_entry->bus = dev_data->next_bus++;
+		if (dev_data->next_bus > PCI_MAX_BUSSES) {
+			dev_data->next_domain++;
+			dev_data->next_bus = 0;
+		}
+
+		INIT_LIST_HEAD(&cntrl_entry->dev_list);
+
+		list_add_tail(&cntrl_entry->list, &dev_data->list);
+	}
+
+	if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) {
+		/*
+		 * While it seems unlikely, this can actually happen if
+		 * a controller has P2P bridges under it.
+		 */
+		xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x "
+				 "is full, no room to export %04x:%02x:%02x.%x",
+				 cntrl_entry->domain, cntrl_entry->bus,
+				 pci_domain_nr(dev->bus), dev->bus->number,
+				 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC);
+	if (!dev_entry) {
+		if (list_empty(&cntrl_entry->dev_list)) {
+			list_del(&cntrl_entry->list);
+			kfree(cntrl_entry);
+		}
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	dev_entry->dev = dev;
+	dev_entry->devfn = cntrl_entry->next_devfn;
+
+	list_add_tail(&dev_entry->list, &cntrl_entry->dev_list);
+
+	cntrl_entry->next_devfn += PCI_DEVFN(1, 0);
+
+out:
+	spin_unlock_irqrestore(&dev_data->lock, flags);
+
+	/* TODO: Publish virtual domain:bus:slot.func here. */
+
+	return ret;
+}
+
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
+{
+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
+	struct controller_list_entry *cntrl_entry;
+	struct controller_dev_entry *dev_entry = NULL;
+	struct pci_dev *found_dev = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_data->lock, flags);
+
+	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
+		if (cntrl_entry->controller != PCI_CONTROLLER(dev))
+			continue;
+
+		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
+			if (dev_entry->dev == dev) {
+				found_dev = dev_entry->dev;
+				break;
+			}
+		}
+	}
+
+	if (!found_dev) {
+		spin_unlock_irqrestore(&dev_data->lock, flags);
+		return;
+	}
+
+	list_del(&dev_entry->list);
+	kfree(dev_entry);
+
+	if (list_empty(&cntrl_entry->dev_list)) {
+		list_del(&cntrl_entry->list);
+		kfree(cntrl_entry);
+	}
+
+	spin_unlock_irqrestore(&dev_data->lock, flags);
+	pcistub_put_pci_dev(found_dev);
+}
+
+int pciback_init_devices(struct pciback_device *pdev)
+{
+	struct controller_dev_data *dev_data;
+
+	dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
+	if (!dev_data)
+		return -ENOMEM;
+
+	spin_lock_init(&dev_data->lock);
+
+	INIT_LIST_HEAD(&dev_data->list);
+
+	/* Starting domain:bus numbers */
+	dev_data->next_domain = 0;
+	dev_data->next_bus = 0;
+
+	pdev->pci_dev_data = dev_data;
+
+	return 0;
+}
+
+static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
+{
+	struct walk_info *info = data;
+	struct acpi_resource_address64 addr;
+	acpi_status status;
+	int i, len, err;
+	char str[32], tmp[3];
+	unsigned char *ptr, *buf;
+
+	status = acpi_resource_to_address64(res, &addr);
+
+	/* Do we care about this range?  Let's check. */
+	if (!ACPI_SUCCESS(status) ||
+	    !(addr.resource_type == ACPI_MEMORY_RANGE ||
+	      addr.resource_type == ACPI_IO_RANGE) ||
+	    !addr.address_length || addr.producer_consumer != ACPI_PRODUCER)
+		return AE_OK;
+
+	/*
+	 * Furthermore, we really only care to tell the guest about
+	 * address ranges that require address translation of some sort.
+	 */
+	if (!(addr.resource_type == ACPI_MEMORY_RANGE &&
+	      addr.info.mem.translation) &&
+	    !(addr.resource_type == ACPI_IO_RANGE &&
+	      addr.info.io.translation))
+		return AE_OK;
+
+	/* Store the resource in xenbus for the guest */
+	len = snprintf(str, sizeof(str), "root-%d-resource-%d",
+		       info->root_num, info->resource_count);
+	if (unlikely(len >= (sizeof(str) - 1)))
+		return AE_OK;
+
+	buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL);
+	if (!buf)
+		return AE_OK;
+
+	/* Clean out resource_source */
+	res->data.address64.resource_source.index = 0xFF;
+	res->data.address64.resource_source.string_length = 0;
+	res->data.address64.resource_source.string_ptr = NULL;
+
+	ptr = (unsigned char *)res;
+
+	/* Turn the acpi_resource into an ASCII byte stream */
+	for (i = 0; i < sizeof(*res); i++) {
+		snprintf(tmp, sizeof(tmp), "%02x", ptr[i]);
+		strncat(buf, tmp, 2);
+	}
+
+	err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename,
+			    str, "%s", buf);
+
+	if (!err)
+		info->resource_count++;
+
+	kfree(buf);
+
+	return AE_OK;
+}
+
+int pciback_publish_pci_roots(struct pciback_device *pdev,
+			      publish_pci_root_cb publish_root_cb)
+{
+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
+	struct controller_list_entry *cntrl_entry;
+	int i, root_num, len, err = 0;
+	unsigned int domain, bus;
+	char str[64];
+	struct walk_info info;
+
+	spin_lock(&dev_data->lock);
+
+	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
+		/* First publish all the domain:bus info */
+		err = publish_root_cb(pdev, cntrl_entry->domain,
+				      cntrl_entry->bus);
+		if (err)
+			goto out;
+
+		/*
+ 		 * Now figure out which root-%d this belongs to
+		 * so we can associate resources with it.
+		 */
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+				   "root_num", "%d", &root_num);
+
+		if (err != 1)
+			goto out;
+
+		for (i = 0; i < root_num; i++) {
+			len = snprintf(str, sizeof(str), "root-%d", i);
+			if (unlikely(len >= (sizeof(str) - 1))) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+					   str, "%x:%x", &domain, &bus);
+			if (err != 2)
+				goto out;
+
+			/* Is this the one we just published? */
+			if (domain == cntrl_entry->domain &&
+			    bus == cntrl_entry->bus)
+				break;
+		}
+
+		if (i == root_num)
+			goto out;
+
+		info.pdev = pdev;
+		info.resource_count = 0;
+		info.root_num = i;
+
+		/* Let ACPI do the heavy lifting on decoding resources */
+		acpi_walk_resources(cntrl_entry->controller->acpi_handle,
+				    METHOD_NAME__CRS, write_xenbus_resource,
+				    &info);
+
+		/* No resouces.  OK.  On to the next one */
+		if (!info.resource_count)
+			continue;
+
+		/* Store the number of resources we wrote for this root-%d */
+		len = snprintf(str, sizeof(str), "root-%d-resources", i);
+		if (unlikely(len >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
+				    "%d", info.resource_count);
+		if (err)
+			goto out;
+	}
+
+	/* Finally, write some magic to synchronize with the guest. */
+	len = snprintf(str, sizeof(str), "root-resource-magic");
+	if (unlikely(len >= (sizeof(str) - 1))) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
+			    "%lx", (sizeof(struct acpi_resource) * 2) + 1);
+
+out:
+	spin_unlock(&dev_data->lock);
+
+	return err;
+}
+
+void pciback_release_devices(struct pciback_device *pdev)
+{
+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
+	struct controller_list_entry *cntrl_entry, *c;
+	struct controller_dev_entry *dev_entry, *d;
+
+	list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) {
+		list_for_each_entry_safe(dev_entry, d,
+					 &cntrl_entry->dev_list, list) {
+			list_del(&dev_entry->list);
+			pcistub_put_pci_dev(dev_entry->dev);
+			kfree(dev_entry);
+		}
+		list_del(&cntrl_entry->list);
+		kfree(cntrl_entry);
+	}
+
+	kfree(dev_data);
+	pdev->pci_dev_data = NULL;
+}
+
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
+		struct pciback_device *pdev,
+		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
+{
+	struct controller_dev_data *dev_data = pdev->pci_dev_data;
+	struct controller_dev_entry *dev_entry;
+	struct controller_list_entry *cntrl_entry;
+	unsigned long flags;
+	int found = 0;
+	spin_lock_irqsave(&dev_data->lock, flags);
+
+	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
+		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
+			if ( (dev_entry->dev->bus->number ==
+					pcidev->bus->number) &&
+			  	(dev_entry->dev->devfn ==
+					pcidev->devfn) &&
+				(pci_domain_nr(dev_entry->dev->bus) ==
+					pci_domain_nr(pcidev->bus)))
+			{
+				found = 1;
+				*domain = cntrl_entry->domain;
+				*bus = cntrl_entry->bus;
+				*devfn = dev_entry->devfn;
+				goto out;
+			}
+		}
+	}
+out:
+	spin_unlock_irqrestore(&dev_data->lock, flags);
+	return found;
+
+}
+
diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
new file mode 100644
index 0000000..9e7a0c4
--- /dev/null
+++ b/drivers/xen/pciback/passthrough.c
@@ -0,0 +1,176 @@
+/*
+ * PCI Backend - Provides restricted access to the real PCI bus topology
+ *               to the frontend
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include "pciback.h"
+
+struct passthrough_dev_data {
+	/* Access to dev_list must be protected by lock */
+	struct list_head dev_list;
+	spinlock_t lock;
+};
+
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
+				    unsigned int domain, unsigned int bus,
+				    unsigned int devfn)
+{
+	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
+	struct pci_dev_entry *dev_entry;
+	struct pci_dev *dev = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_data->lock, flags);
+
+	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
+		if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
+		    && bus == (unsigned int)dev_entry->dev->bus->number
+		    && devfn == dev_entry->dev->devfn) {
+			dev = dev_entry->dev;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&dev_data->lock, flags);
+
+	return dev;
+}
+
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+			int devid, publish_pci_dev_cb publish_cb)
+{
+	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
+	struct pci_dev_entry *dev_entry;
+	unsigned long flags;
+	unsigned int domain, bus, devfn;
+	int err;
+
+	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
+	if (!dev_entry)
+		return -ENOMEM;
+	dev_entry->dev = dev;
+
+	spin_lock_irqsave(&dev_data->lock, flags);
+	list_add_tail(&dev_entry->list, &dev_data->dev_list);
+	spin_unlock_irqrestore(&dev_data->lock, flags);
+
+	/* Publish this device. */
+	domain = (unsigned int)pci_domain_nr(dev->bus);
+	bus = (unsigned int)dev->bus->number;
+	devfn = dev->devfn;
+	err = publish_cb(pdev, domain, bus, devfn, devid);
+
+	return err;
+}
+
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
+{
+	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
+	struct pci_dev_entry *dev_entry, *t;
+	struct pci_dev *found_dev = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_data->lock, flags);
+
+	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
+		if (dev_entry->dev == dev) {
+			list_del(&dev_entry->list);
+			found_dev = dev_entry->dev;
+			kfree(dev_entry);
+		}
+	}
+
+	spin_unlock_irqrestore(&dev_data->lock, flags);
+
+	if (found_dev)
+		pcistub_put_pci_dev(found_dev);
+}
+
+int pciback_init_devices(struct pciback_device *pdev)
+{
+	struct passthrough_dev_data *dev_data;
+
+	dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
+	if (!dev_data)
+		return -ENOMEM;
+
+	spin_lock_init(&dev_data->lock);
+
+	INIT_LIST_HEAD(&dev_data->dev_list);
+
+	pdev->pci_dev_data = dev_data;
+
+	return 0;
+}
+
+int pciback_publish_pci_roots(struct pciback_device *pdev,
+			      publish_pci_root_cb publish_root_cb)
+{
+	int err = 0;
+	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
+	struct pci_dev_entry *dev_entry, *e;
+	struct pci_dev *dev;
+	int found;
+	unsigned int domain, bus;
+
+	spin_lock(&dev_data->lock);
+
+	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
+		/* Only publish this device as a root if none of its
+		 * parent bridges are exported
+		 */
+		found = 0;
+		dev = dev_entry->dev->bus->self;
+		for (; !found && dev != NULL; dev = dev->bus->self) {
+			list_for_each_entry(e, &dev_data->dev_list, list) {
+				if (dev == e->dev) {
+					found = 1;
+					break;
+				}
+			}
+		}
+
+		domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
+		bus = (unsigned int)dev_entry->dev->bus->number;
+
+		if (!found) {
+			err = publish_root_cb(pdev, domain, bus);
+			if (err)
+				break;
+		}
+	}
+
+	spin_unlock(&dev_data->lock);
+
+	return err;
+}
+
+void pciback_release_devices(struct pciback_device *pdev)
+{
+	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
+	struct pci_dev_entry *dev_entry, *t;
+
+	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
+		list_del(&dev_entry->list);
+		pcistub_put_pci_dev(dev_entry->dev);
+		kfree(dev_entry);
+	}
+
+	kfree(dev_data);
+	pdev->pci_dev_data = NULL;
+}
+
+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
+		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
+
+{
+	*domain = pci_domain_nr(pcidev->bus);
+	*bus = pcidev->bus->number;
+	*devfn = pcidev->devfn;
+	return 1;
+}
diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
new file mode 100644
index 0000000..c481a73
--- /dev/null
+++ b/drivers/xen/pciback/pci_stub.c
@@ -0,0 +1,1316 @@
+/*
+ * PCI Stub Driver - Grabs devices in backend to be exported later
+ *
+ * Ryan Wilson <hap9@epoch.ncsc.mil>
+ * Chris Bookholt <hap10@epoch.ncsc.mil>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+#include <linux/pci.h>
+#include <linux/wait.h>
+#include <asm/atomic.h>
+#include <xen/evtchn.h>
+#include "pciback.h"
+#include "conf_space.h"
+#include "conf_space_quirks.h"
+
+static char *pci_devs_to_hide = NULL;
+wait_queue_head_t aer_wait_queue;
+/*Add sem for sync AER handling and pciback remove/reconfigue ops,
+* We want to avoid in middle of AER ops, pciback devices is being removed
+*/
+static DECLARE_RWSEM(pcistub_sem);
+module_param_named(hide, pci_devs_to_hide, charp, 0444);
+
+struct pcistub_device_id {
+	struct list_head slot_list;
+	int domain;
+	unsigned char bus;
+	unsigned int devfn;
+};
+static LIST_HEAD(pcistub_device_ids);
+static DEFINE_SPINLOCK(device_ids_lock);
+
+struct pcistub_device {
+	struct kref kref;
+	struct list_head dev_list;
+	spinlock_t lock;
+
+	struct pci_dev *dev;
+	struct pciback_device *pdev;	/* non-NULL if struct pci_dev is in use */
+};
+
+/* Access to pcistub_devices & seized_devices lists and the initialize_devices
+ * flag must be locked with pcistub_devices_lock
+ */
+static DEFINE_SPINLOCK(pcistub_devices_lock);
+static LIST_HEAD(pcistub_devices);
+
+/* wait for device_initcall before initializing our devices
+ * (see pcistub_init_devices_late)
+ */
+static int initialize_devices = 0;
+static LIST_HEAD(seized_devices);
+
+static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
+{
+	struct pcistub_device *psdev;
+
+	dev_dbg(&dev->dev, "pcistub_device_alloc\n");
+
+	psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
+	if (!psdev)
+		return NULL;
+
+	psdev->dev = pci_dev_get(dev);
+	if (!psdev->dev) {
+		kfree(psdev);
+		return NULL;
+	}
+
+	kref_init(&psdev->kref);
+	spin_lock_init(&psdev->lock);
+
+	return psdev;
+}
+
+/* Don't call this directly as it's called by pcistub_device_put */
+static void pcistub_device_release(struct kref *kref)
+{
+	struct pcistub_device *psdev;
+
+	psdev = container_of(kref, struct pcistub_device, kref);
+
+	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
+
+	/* Clean-up the device */
+	pciback_reset_device(psdev->dev);
+	pciback_config_free_dyn_fields(psdev->dev);
+	pciback_config_free_dev(psdev->dev);
+	kfree(pci_get_drvdata(psdev->dev));
+	pci_set_drvdata(psdev->dev, NULL);
+
+	pci_dev_put(psdev->dev);
+
+	kfree(psdev);
+}
+
+static inline void pcistub_device_get(struct pcistub_device *psdev)
+{
+	kref_get(&psdev->kref);
+}
+
+static inline void pcistub_device_put(struct pcistub_device *psdev)
+{
+	kref_put(&psdev->kref, pcistub_device_release);
+}
+
+static struct pcistub_device *pcistub_device_find(int domain, int bus,
+						  int slot, int func)
+{
+	struct pcistub_device *psdev = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+		if (psdev->dev != NULL
+		    && domain == pci_domain_nr(psdev->dev->bus)
+		    && bus == psdev->dev->bus->number
+		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
+			pcistub_device_get(psdev);
+			goto out;
+		}
+	}
+
+	/* didn't find it */
+	psdev = NULL;
+
+      out:
+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+	return psdev;
+}
+
+static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
+						  struct pcistub_device *psdev)
+{
+	struct pci_dev *pci_dev = NULL;
+	unsigned long flags;
+
+	pcistub_device_get(psdev);
+
+	spin_lock_irqsave(&psdev->lock, flags);
+	if (!psdev->pdev) {
+		psdev->pdev = pdev;
+		pci_dev = psdev->dev;
+	}
+	spin_unlock_irqrestore(&psdev->lock, flags);
+
+	if (!pci_dev)
+		pcistub_device_put(psdev);
+
+	return pci_dev;
+}
+
+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
+					    int domain, int bus,
+					    int slot, int func)
+{
+	struct pcistub_device *psdev;
+	struct pci_dev *found_dev = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+		if (psdev->dev != NULL
+		    && domain == pci_domain_nr(psdev->dev->bus)
+		    && bus == psdev->dev->bus->number
+		    && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
+			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+	return found_dev;
+}
+
+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
+				    struct pci_dev *dev)
+{
+	struct pcistub_device *psdev;
+	struct pci_dev *found_dev = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+		if (psdev->dev == dev) {
+			found_dev = pcistub_device_get_pci_dev(pdev, psdev);
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+	return found_dev;
+}
+
+void pcistub_put_pci_dev(struct pci_dev *dev)
+{
+	struct pcistub_device *psdev, *found_psdev = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+		if (psdev->dev == dev) {
+			found_psdev = psdev;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+	/*hold this lock for avoiding breaking link between
+	* pcistub and pciback when AER is in processing
+	*/
+	down_write(&pcistub_sem);
+	/* Cleanup our device
+	 * (so it's ready for the next domain)
+	 */
+	pciback_reset_device(found_psdev->dev);
+	pciback_config_free_dyn_fields(found_psdev->dev);
+	pciback_config_reset_dev(found_psdev->dev);
+
+	spin_lock_irqsave(&found_psdev->lock, flags);
+	found_psdev->pdev = NULL;
+	spin_unlock_irqrestore(&found_psdev->lock, flags);
+
+	pcistub_device_put(found_psdev);
+	up_write(&pcistub_sem);
+}
+
+static int __devinit pcistub_match_one(struct pci_dev *dev,
+				       struct pcistub_device_id *pdev_id)
+{
+	/* Match the specified device by domain, bus, slot, func and also if
+	 * any of the device's parent bridges match.
+	 */
+	for (; dev != NULL; dev = dev->bus->self) {
+		if (pci_domain_nr(dev->bus) == pdev_id->domain
+		    && dev->bus->number == pdev_id->bus
+		    && dev->devfn == pdev_id->devfn)
+			return 1;
+
+		/* Sometimes topmost bridge links to itself. */
+		if (dev == dev->bus->self)
+			break;
+	}
+
+	return 0;
+}
+
+static int __devinit pcistub_match(struct pci_dev *dev)
+{
+	struct pcistub_device_id *pdev_id;
+	unsigned long flags;
+	int found = 0;
+
+	spin_lock_irqsave(&device_ids_lock, flags);
+	list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
+		if (pcistub_match_one(dev, pdev_id)) {
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&device_ids_lock, flags);
+
+	return found;
+}
+
+static int __devinit pcistub_init_device(struct pci_dev *dev)
+{
+	struct pciback_dev_data *dev_data;
+	int err = 0;
+
+	dev_dbg(&dev->dev, "initializing...\n");
+
+	/* The PCI backend is not intended to be a module (or to work with
+	 * removable PCI devices (yet). If it were, pciback_config_free()
+	 * would need to be called somewhere to free the memory allocated
+	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
+	 */
+	dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
+	if (!dev_data) {
+		err = -ENOMEM;
+		goto out;
+	}
+	pci_set_drvdata(dev, dev_data);
+
+	dev_dbg(&dev->dev, "initializing config\n");
+
+	init_waitqueue_head(&aer_wait_queue);
+	err = pciback_config_init_dev(dev);
+	if (err)
+		goto out;
+
+	/* HACK: Force device (& ACPI) to determine what IRQ it's on - we
+	 * must do this here because pcibios_enable_device may specify
+	 * the pci device's true irq (and possibly its other resources)
+	 * if they differ from what's in the configuration space.
+	 * This makes the assumption that the device's resources won't
+	 * change after this point (otherwise this code may break!)
+	 */
+	dev_dbg(&dev->dev, "enabling device\n");
+	err = pci_enable_device(dev);
+	if (err)
+		goto config_release;
+
+	/* Now disable the device (this also ensures some private device
+	 * data is setup before we export)
+	 */
+	dev_dbg(&dev->dev, "reset device\n");
+	pciback_reset_device(dev);
+
+	return 0;
+
+      config_release:
+	pciback_config_free_dev(dev);
+
+      out:
+	pci_set_drvdata(dev, NULL);
+	kfree(dev_data);
+	return err;
+}
+
+/*
+ * Because some initialization still happens on
+ * devices during fs_initcall, we need to defer
+ * full initialization of our devices until
+ * device_initcall.
+ */
+static int __init pcistub_init_devices_late(void)
+{
+	struct pcistub_device *psdev;
+	unsigned long flags;
+	int err = 0;
+
+	pr_debug("pciback: pcistub_init_devices_late\n");
+
+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+	while (!list_empty(&seized_devices)) {
+		psdev = container_of(seized_devices.next,
+				     struct pcistub_device, dev_list);
+		list_del(&psdev->dev_list);
+
+		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+		err = pcistub_init_device(psdev->dev);
+		if (err) {
+			dev_err(&psdev->dev->dev,
+				"error %d initializing device\n", err);
+			kfree(psdev);
+			psdev = NULL;
+		}
+
+		spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+		if (psdev)
+			list_add_tail(&psdev->dev_list, &pcistub_devices);
+	}
+
+	initialize_devices = 1;
+
+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+	return 0;
+}
+
+static int __devinit pcistub_seize(struct pci_dev *dev)
+{
+	struct pcistub_device *psdev;
+	unsigned long flags;
+	int err = 0;
+
+	psdev = pcistub_device_alloc(dev);
+	if (!psdev)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+	if (initialize_devices) {
+		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+		/* don't want irqs disabled when calling pcistub_init_device */
+		err = pcistub_init_device(psdev->dev);
+
+		spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+		if (!err)
+			list_add(&psdev->dev_list, &pcistub_devices);
+	} else {
+		dev_dbg(&dev->dev, "deferring initialization\n");
+		list_add(&psdev->dev_list, &seized_devices);
+	}
+
+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+	if (err)
+		pcistub_device_put(psdev);
+
+	return err;
+}
+
+static int __devinit pcistub_probe(struct pci_dev *dev,
+				   const struct pci_device_id *id)
+{
+	int err = 0;
+
+	dev_dbg(&dev->dev, "probing...\n");
+
+	if (pcistub_match(dev)) {
+
+		if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
+		    && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
+			dev_err(&dev->dev, "can't export pci devices that "
+				"don't have a normal (0) or bridge (1) "
+				"header type!\n");
+			err = -ENODEV;
+			goto out;
+		}
+
+		dev_info(&dev->dev, "seizing device\n");
+		err = pcistub_seize(dev);
+#ifdef CONFIG_PCI_GUESTDEV
+	} else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
+		if (!pci_is_guestdev(dev)) {
+			err = -ENODEV;
+			goto out;
+		}
+
+		dev_info(&dev->dev, "seizing device\n");
+		err = pcistub_seize(dev);
+#endif /* CONFIG_PCI_GUESTDEV */
+	} else
+		/* Didn't find the device */
+		err = -ENODEV;
+
+      out:
+	return err;
+}
+
+static void pcistub_remove(struct pci_dev *dev)
+{
+	struct pcistub_device *psdev, *found_psdev = NULL;
+	unsigned long flags;
+
+	dev_dbg(&dev->dev, "removing\n");
+
+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+
+	pciback_config_quirk_release(dev);
+
+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+		if (psdev->dev == dev) {
+			found_psdev = psdev;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+	if (found_psdev) {
+		dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
+			found_psdev->pdev);
+
+		if (found_psdev->pdev) {
+			printk(KERN_WARNING "pciback: ****** removing device "
+			       "%s while still in-use! ******\n",
+			       pci_name(found_psdev->dev));
+			printk(KERN_WARNING "pciback: ****** driver domain may "
+			       "still access this device's i/o resources!\n");
+			printk(KERN_WARNING "pciback: ****** shutdown driver "
+			       "domain before binding device\n");
+			printk(KERN_WARNING "pciback: ****** to other drivers "
+			       "or domains\n");
+
+			pciback_release_pci_dev(found_psdev->pdev,
+						found_psdev->dev);
+		}
+
+		spin_lock_irqsave(&pcistub_devices_lock, flags);
+		list_del(&found_psdev->dev_list);
+		spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+
+		/* the final put for releasing from the list */
+		pcistub_device_put(found_psdev);
+	}
+}
+
+static const struct pci_device_id pcistub_ids[] = {
+	{
+	 .vendor = PCI_ANY_ID,
+	 .device = PCI_ANY_ID,
+	 .subvendor = PCI_ANY_ID,
+	 .subdevice = PCI_ANY_ID,
+	 },
+	{0,},
+};
+
+static void kill_domain_by_device(struct pcistub_device *psdev)
+{
+	struct xenbus_transaction xbt;
+	int err;
+	char nodename[1024];
+
+	if (!psdev)
+		dev_err(&psdev->dev->dev,
+			"device is NULL when do AER recovery/kill_domain\n");
+	sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
+		psdev->pdev->xdev->otherend_id);
+	nodename[strlen(nodename)] = '\0';
+
+again:
+	err = xenbus_transaction_start(&xbt);
+	if (err)
+	{
+		dev_err(&psdev->dev->dev,
+			"error %d when start xenbus transaction\n", err);
+		return;
+	}
+	/*PV AER handlers will set this flag*/
+	xenbus_printf(xbt, nodename, "aerState" , "aerfail" );
+	err = xenbus_transaction_end(xbt, 0);
+	if (err)
+	{
+		if (err == -EAGAIN)
+			goto again;
+		dev_err(&psdev->dev->dev,
+			"error %d when end xenbus transaction\n", err);
+		return;
+	}
+}
+
+/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
+ * backend need to have cooperation. In pciback, those steps will do similar
+ * jobs: send service request and waiting for front_end response.
+*/
+static pci_ers_result_t common_process(struct pcistub_device *psdev,
+		pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
+{
+	pci_ers_result_t res = result;
+	struct xen_pcie_aer_op *aer_op;
+	int ret;
+
+	/*with PV AER drivers*/
+	aer_op = &(psdev->pdev->sh_info->aer_op);
+	aer_op->cmd = aer_cmd ;
+	/*useful for error_detected callback*/
+	aer_op->err = state;
+	/*pcifront_end BDF*/
+	ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev,
+		&aer_op->domain, &aer_op->bus, &aer_op->devfn);
+	if (!ret) {
+		dev_err(&psdev->dev->dev,
+			"pciback: failed to get pcifront device\n");
+		return PCI_ERS_RESULT_NONE;
+	}
+	wmb();
+
+	dev_dbg(&psdev->dev->dev,
+			"pciback: aer_op %x dom %x bus %x devfn %x\n",
+			aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
+	/*local flag to mark there's aer request, pciback callback will use this
+	* flag to judge whether we need to check pci-front give aer service
+	* ack signal
+	*/
+	set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
+
+	/*It is possible that a pcifront conf_read_write ops request invokes
+	* the callback which cause the spurious execution of wake_up.
+	* Yet it is harmless and better than a spinlock here
+	*/
+	set_bit(_XEN_PCIB_active,
+		(unsigned long *)&psdev->pdev->sh_info->flags);
+	wmb();
+	notify_remote_via_irq(psdev->pdev->evtchn_irq);
+
+	ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
+                (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
+
+	if (!ret) {
+		if (test_bit(_XEN_PCIB_active,
+			(unsigned long *)&psdev->pdev->sh_info->flags)) {
+			dev_err(&psdev->dev->dev,
+				"pcifront aer process not responding!\n");
+			clear_bit(_XEN_PCIB_active,
+			  (unsigned long *)&psdev->pdev->sh_info->flags);
+			aer_op->err = PCI_ERS_RESULT_NONE;
+			return res;
+		}
+	}
+	clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
+
+	if ( test_bit( _XEN_PCIF_active,
+		(unsigned long*)&psdev->pdev->sh_info->flags)) {
+		dev_dbg(&psdev->dev->dev,
+			"schedule pci_conf service in pciback \n");
+		test_and_schedule_op(psdev->pdev);
+	}
+
+	res = (pci_ers_result_t)aer_op->err;
+	return res;
+}
+
+/*
+* pciback_slot_reset: it will send the slot_reset request to  pcifront in case
+* of the device driver could provide this service, and then wait for pcifront
+* ack.
+* @dev: pointer to PCI devices
+* return value is used by aer_core do_recovery policy
+*/
+static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
+{
+	struct pcistub_device *psdev;
+	pci_ers_result_t result;
+
+	result = PCI_ERS_RESULT_RECOVERED;
+	dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n",
+		dev->bus->number, dev->devfn);
+
+	down_write(&pcistub_sem);
+	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
+				dev->bus->number,
+				PCI_SLOT(dev->devfn),
+				PCI_FUNC(dev->devfn));
+
+	if ( !psdev || !psdev->pdev )
+	{
+		dev_err(&dev->dev,
+			"pciback device is not found/assigned\n");
+		goto end;
+	}
+
+	if ( !psdev->pdev->sh_info )
+	{
+		dev_err(&dev->dev, "pciback device is not connected or owned"
+			" by HVM, kill it\n");
+		kill_domain_by_device(psdev);
+		goto release;
+	}
+
+	if ( !test_bit(_XEN_PCIB_AERHANDLER,
+		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
+		dev_err(&dev->dev,
+			"guest with no AER driver should have been killed\n");
+		goto release;
+	}
+	result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
+
+	if (result == PCI_ERS_RESULT_NONE ||
+		result == PCI_ERS_RESULT_DISCONNECT) {
+		dev_dbg(&dev->dev,
+			"No AER slot_reset service or disconnected!\n");
+		kill_domain_by_device(psdev);
+	}
+release:
+	pcistub_device_put(psdev);
+end:
+	up_write(&pcistub_sem);
+	return result;
+
+}
+
+
+/*pciback_mmio_enabled: it will send the mmio_enabled request to  pcifront
+* in case of the device driver could provide this service, and then wait
+* for pcifront ack.
+* @dev: pointer to PCI devices
+* return value is used by aer_core do_recovery policy
+*/
+
+static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
+{
+	struct pcistub_device *psdev;
+	pci_ers_result_t result;
+
+	result = PCI_ERS_RESULT_RECOVERED;
+	dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n",
+		dev->bus->number, dev->devfn);
+
+	down_write(&pcistub_sem);
+	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
+				dev->bus->number,
+				PCI_SLOT(dev->devfn),
+				PCI_FUNC(dev->devfn));
+
+	if ( !psdev || !psdev->pdev )
+	{
+		dev_err(&dev->dev,
+			"pciback device is not found/assigned\n");
+		goto end;
+	}
+
+	if ( !psdev->pdev->sh_info )
+	{
+		dev_err(&dev->dev, "pciback device is not connected or owned"
+			" by HVM, kill it\n");
+		kill_domain_by_device(psdev);
+		goto release;
+	}
+
+	if ( !test_bit(_XEN_PCIB_AERHANDLER,
+		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
+		dev_err(&dev->dev,
+			"guest with no AER driver should have been killed\n");
+		goto release;
+	}
+	result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
+
+	if (result == PCI_ERS_RESULT_NONE ||
+		result == PCI_ERS_RESULT_DISCONNECT) {
+		dev_dbg(&dev->dev,
+			"No AER mmio_enabled service or disconnected!\n");
+		kill_domain_by_device(psdev);
+	}
+release:
+	pcistub_device_put(psdev);
+end:
+	up_write(&pcistub_sem);
+	return result;
+}
+
+/*pciback_error_detected: it will send the error_detected request to  pcifront
+* in case of the device driver could provide this service, and then wait
+* for pcifront ack.
+* @dev: pointer to PCI devices
+* @error: the current PCI connection state
+* return value is used by aer_core do_recovery policy
+*/
+
+static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
+	pci_channel_state_t error)
+{
+	struct pcistub_device *psdev;
+	pci_ers_result_t result;
+
+	result = PCI_ERS_RESULT_CAN_RECOVER;
+	dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n",
+		dev->bus->number, dev->devfn);
+
+	down_write(&pcistub_sem);
+	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
+				dev->bus->number,
+				PCI_SLOT(dev->devfn),
+				PCI_FUNC(dev->devfn));
+
+	if ( !psdev || !psdev->pdev )
+	{
+		dev_err(&dev->dev,
+			"pciback device is not found/assigned\n");
+		goto end;
+	}
+
+	if ( !psdev->pdev->sh_info )
+	{
+		dev_err(&dev->dev, "pciback device is not connected or owned"
+			" by HVM, kill it\n");
+		kill_domain_by_device(psdev);
+		goto release;
+	}
+
+	/*Guest owns the device yet no aer handler regiested, kill guest*/
+	if ( !test_bit(_XEN_PCIB_AERHANDLER,
+		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
+		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
+		kill_domain_by_device(psdev);
+		goto release;
+	}
+	result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
+
+	if (result == PCI_ERS_RESULT_NONE ||
+		result == PCI_ERS_RESULT_DISCONNECT) {
+		dev_dbg(&dev->dev,
+			"No AER error_detected service or disconnected!\n");
+		kill_domain_by_device(psdev);
+	}
+release:
+	pcistub_device_put(psdev);
+end:
+	up_write(&pcistub_sem);
+	return result;
+}
+
+/*pciback_error_resume: it will send the error_resume request to  pcifront
+* in case of the device driver could provide this service, and then wait
+* for pcifront ack.
+* @dev: pointer to PCI devices
+*/
+
+static void pciback_error_resume(struct pci_dev *dev)
+{
+	struct pcistub_device *psdev;
+
+	dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n",
+		dev->bus->number, dev->devfn);
+
+	down_write(&pcistub_sem);
+	psdev = pcistub_device_find(pci_domain_nr(dev->bus),
+				dev->bus->number,
+				PCI_SLOT(dev->devfn),
+				PCI_FUNC(dev->devfn));
+
+	if ( !psdev || !psdev->pdev )
+	{
+		dev_err(&dev->dev,
+			"pciback device is not found/assigned\n");
+		goto end;
+	}
+
+	if ( !psdev->pdev->sh_info )
+	{
+		dev_err(&dev->dev, "pciback device is not connected or owned"
+			" by HVM, kill it\n");
+		kill_domain_by_device(psdev);
+		goto release;
+	}
+
+	if ( !test_bit(_XEN_PCIB_AERHANDLER,
+		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
+		dev_err(&dev->dev,
+			"guest with no AER driver should have been killed\n");
+		kill_domain_by_device(psdev);
+		goto release;
+	}
+	common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED);
+release:
+	pcistub_device_put(psdev);
+end:
+	up_write(&pcistub_sem);
+	return;
+}
+
+/*add pciback AER handling*/
+static struct pci_error_handlers pciback_error_handler = {
+	.error_detected = pciback_error_detected,
+	.mmio_enabled = pciback_mmio_enabled,
+	.slot_reset = pciback_slot_reset,
+	.resume = pciback_error_resume,
+};
+
+/*
+ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
+ * for a normal device. I don't want it to be loaded automatically.
+ */
+
+static struct pci_driver pciback_pci_driver = {
+	.name = "pciback",
+	.id_table = pcistub_ids,
+	.probe = pcistub_probe,
+	.remove = pcistub_remove,
+	.err_handler = &pciback_error_handler,
+};
+
+static inline int str_to_slot(const char *buf, int *domain, int *bus,
+			      int *slot, int *func)
+{
+	int err;
+
+	err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
+	if (err == 4)
+		return 0;
+	else if (err < 0)
+		return -EINVAL;
+
+	/* try again without domain */
+	*domain = 0;
+	err = sscanf(buf, " %x:%x.%x", bus, slot, func);
+	if (err == 3)
+		return 0;
+
+	return -EINVAL;
+}
+
+static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
+			       *slot, int *func, int *reg, int *size, int *mask)
+{
+	int err;
+
+	err =
+	    sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
+		   func, reg, size, mask);
+	if (err == 7)
+		return 0;
+	return -EINVAL;
+}
+
+static int pcistub_device_id_add(int domain, int bus, int slot, int func)
+{
+	struct pcistub_device_id *pci_dev_id;
+	unsigned long flags;
+
+	pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
+	if (!pci_dev_id)
+		return -ENOMEM;
+
+	pci_dev_id->domain = domain;
+	pci_dev_id->bus = bus;
+	pci_dev_id->devfn = PCI_DEVFN(slot, func);
+
+	pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
+		 domain, bus, slot, func);
+
+	spin_lock_irqsave(&device_ids_lock, flags);
+	list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
+	spin_unlock_irqrestore(&device_ids_lock, flags);
+
+	return 0;
+}
+
+static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
+{
+	struct pcistub_device_id *pci_dev_id, *t;
+	int devfn = PCI_DEVFN(slot, func);
+	int err = -ENOENT;
+	unsigned long flags;
+
+	spin_lock_irqsave(&device_ids_lock, flags);
+	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
+
+		if (pci_dev_id->domain == domain
+		    && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
+			/* Don't break; here because it's possible the same
+			 * slot could be in the list more than once
+			 */
+			list_del(&pci_dev_id->slot_list);
+			kfree(pci_dev_id);
+
+			err = 0;
+
+			pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
+				 "seize list\n", domain, bus, slot, func);
+		}
+	}
+	spin_unlock_irqrestore(&device_ids_lock, flags);
+
+	return err;
+}
+
+static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
+			   int size, int mask)
+{
+	int err = 0;
+	struct pcistub_device *psdev;
+	struct pci_dev *dev;
+	struct config_field *field;
+
+	psdev = pcistub_device_find(domain, bus, slot, func);
+	if (!psdev || !psdev->dev) {
+		err = -ENODEV;
+		goto out;
+	}
+	dev = psdev->dev;
+
+	field = kzalloc(sizeof(*field), GFP_ATOMIC);
+	if (!field) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	field->offset = reg;
+	field->size = size;
+	field->mask = mask;
+	field->init = NULL;
+	field->reset = NULL;
+	field->release = NULL;
+	field->clean = pciback_config_field_free;
+
+	err = pciback_config_quirks_add_field(dev, field);
+	if (err)
+		kfree(field);
+      out:
+	return err;
+}
+
+static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
+				size_t count)
+{
+	int domain, bus, slot, func;
+	int err;
+
+	err = str_to_slot(buf, &domain, &bus, &slot, &func);
+	if (err)
+		goto out;
+
+	err = pcistub_device_id_add(domain, bus, slot, func);
+
+      out:
+	if (!err)
+		err = count;
+	return err;
+}
+
+DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
+
+static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
+				   size_t count)
+{
+	int domain, bus, slot, func;
+	int err;
+
+	err = str_to_slot(buf, &domain, &bus, &slot, &func);
+	if (err)
+		goto out;
+
+	err = pcistub_device_id_remove(domain, bus, slot, func);
+
+      out:
+	if (!err)
+		err = count;
+	return err;
+}
+
+DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
+
+static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
+{
+	struct pcistub_device_id *pci_dev_id;
+	size_t count = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&device_ids_lock, flags);
+	list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
+		if (count >= PAGE_SIZE)
+			break;
+
+		count += scnprintf(buf + count, PAGE_SIZE - count,
+				   "%04x:%02x:%02x.%01x\n",
+				   pci_dev_id->domain, pci_dev_id->bus,
+				   PCI_SLOT(pci_dev_id->devfn),
+				   PCI_FUNC(pci_dev_id->devfn));
+	}
+	spin_unlock_irqrestore(&device_ids_lock, flags);
+
+	return count;
+}
+
+DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
+
+static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
+				 size_t count)
+{
+	int domain, bus, slot, func, reg, size, mask;
+	int err;
+
+	err = str_to_quirk(buf, &domain, &bus, &slot, &func, &reg, &size,
+			   &mask);
+	if (err)
+		goto out;
+
+	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
+
+      out:
+	if (!err)
+		err = count;
+	return err;
+}
+
+static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
+{
+	int count = 0;
+	unsigned long flags;
+	extern struct list_head pciback_quirks;
+	struct pciback_config_quirk *quirk;
+	struct pciback_dev_data *dev_data;
+	const struct config_field *field;
+	const struct config_field_entry *cfg_entry;
+
+	spin_lock_irqsave(&device_ids_lock, flags);
+	list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
+		if (count >= PAGE_SIZE)
+			goto out;
+
+		count += scnprintf(buf + count, PAGE_SIZE - count,
+				   "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
+				   quirk->pdev->bus->number,
+				   PCI_SLOT(quirk->pdev->devfn),
+				   PCI_FUNC(quirk->pdev->devfn),
+				   quirk->devid.vendor, quirk->devid.device,
+				   quirk->devid.subvendor,
+				   quirk->devid.subdevice);
+
+		dev_data = pci_get_drvdata(quirk->pdev);
+
+		list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+			field = cfg_entry->field;
+			if (count >= PAGE_SIZE)
+				goto out;
+
+			count += scnprintf(buf + count, PAGE_SIZE - count,
+					   "\t\t%08x:%01x:%08x\n",
+					   cfg_entry->base_offset + field->offset,
+					   field->size, field->mask);
+		}
+	}
+
+      out:
+	spin_unlock_irqrestore(&device_ids_lock, flags);
+
+	return count;
+}
+
+DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
+
+static ssize_t permissive_add(struct device_driver *drv, const char *buf,
+			      size_t count)
+{
+	int domain, bus, slot, func;
+	int err;
+	struct pcistub_device *psdev;
+	struct pciback_dev_data *dev_data;
+	err = str_to_slot(buf, &domain, &bus, &slot, &func);
+	if (err)
+		goto out;
+	psdev = pcistub_device_find(domain, bus, slot, func);
+	if (!psdev) {
+		err = -ENODEV;
+		goto out;
+	}
+	if (!psdev->dev) {
+		err = -ENODEV;
+		goto release;
+	}
+	dev_data = pci_get_drvdata(psdev->dev);
+	/* the driver data for a device should never be null at this point */
+	if (!dev_data) {
+		err = -ENXIO;
+		goto release;
+	}
+	if (!dev_data->permissive) {
+		dev_data->permissive = 1;
+		/* Let user know that what they're doing could be unsafe */
+		dev_warn(&psdev->dev->dev,
+			 "enabling permissive mode configuration space accesses!\n");
+		dev_warn(&psdev->dev->dev,
+			 "permissive mode is potentially unsafe!\n");
+	}
+      release:
+	pcistub_device_put(psdev);
+      out:
+	if (!err)
+		err = count;
+	return err;
+}
+
+static ssize_t permissive_show(struct device_driver *drv, char *buf)
+{
+	struct pcistub_device *psdev;
+	struct pciback_dev_data *dev_data;
+	size_t count = 0;
+	unsigned long flags;
+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+		if (count >= PAGE_SIZE)
+			break;
+		if (!psdev->dev)
+			continue;
+		dev_data = pci_get_drvdata(psdev->dev);
+		if (!dev_data || !dev_data->permissive)
+			continue;
+		count +=
+		    scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
+			      pci_name(psdev->dev));
+	}
+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+	return count;
+}
+
+DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
+
+#ifdef CONFIG_PCI_MSI
+
+int pciback_get_owner(struct pci_dev *dev)
+{
+	struct pcistub_device *psdev;
+
+	psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
+			PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+
+	if (!psdev || !psdev->pdev)
+		return -1;
+
+	return psdev->pdev->xdev->otherend_id;
+}
+#endif
+
+static void pcistub_exit(void)
+{
+	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
+	driver_remove_file(&pciback_pci_driver.driver,
+			   &driver_attr_remove_slot);
+	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
+	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
+	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
+
+	pci_unregister_driver(&pciback_pci_driver);
+	WARN_ON(unregister_msi_get_owner(pciback_get_owner));
+}
+
+static int __init pcistub_init(void)
+{
+	int pos = 0;
+	int err = 0;
+	int domain, bus, slot, func;
+	int parsed;
+
+	if (pci_devs_to_hide && *pci_devs_to_hide) {
+		do {
+			parsed = 0;
+
+			err = sscanf(pci_devs_to_hide + pos,
+				     " (%x:%x:%x.%x) %n",
+				     &domain, &bus, &slot, &func, &parsed);
+			if (err != 4) {
+				domain = 0;
+				err = sscanf(pci_devs_to_hide + pos,
+					     " (%x:%x.%x) %n",
+					     &bus, &slot, &func, &parsed);
+				if (err != 3)
+					goto parse_error;
+			}
+
+			err = pcistub_device_id_add(domain, bus, slot, func);
+			if (err)
+				goto out;
+
+			/* if parsed<=0, we've reached the end of the string */
+			pos += parsed;
+		} while (parsed > 0 && pci_devs_to_hide[pos]);
+	}
+
+	/* If we're the first PCI Device Driver to register, we're the
+	 * first one to get offered PCI devices as they become
+	 * available (and thus we can be the first to grab them)
+	 */
+	err = pci_register_driver(&pciback_pci_driver);
+	if (err < 0)
+		goto out;
+
+	err = driver_create_file(&pciback_pci_driver.driver,
+				 &driver_attr_new_slot);
+	if (!err)
+		err = driver_create_file(&pciback_pci_driver.driver,
+					 &driver_attr_remove_slot);
+	if (!err)
+		err = driver_create_file(&pciback_pci_driver.driver,
+					 &driver_attr_slots);
+	if (!err)
+		err = driver_create_file(&pciback_pci_driver.driver,
+					 &driver_attr_quirks);
+	if (!err)
+		err = driver_create_file(&pciback_pci_driver.driver,
+					 &driver_attr_permissive);
+
+	if (!err)
+		err = register_msi_get_owner(pciback_get_owner);
+	if (err)
+		pcistub_exit();
+
+      out:
+	return err;
+
+      parse_error:
+	printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
+	       pci_devs_to_hide + pos);
+	return -EINVAL;
+}
+
+#ifndef MODULE
+/*
+ * fs_initcall happens before device_initcall
+ * so pciback *should* get called first (b/c we
+ * want to suck up any device before other drivers
+ * get a chance by being the first pci device
+ * driver to register)
+ */
+fs_initcall(pcistub_init);
+#endif
+
+static int __init pciback_init(void)
+{
+	int err;
+
+	err = pciback_config_init();
+	if (err)
+		return err;
+
+#ifdef MODULE
+	err = pcistub_init();
+	if (err < 0)
+		return err;
+#endif
+
+	pcistub_init_devices_late();
+	err = pciback_xenbus_register();
+	if (err)
+		pcistub_exit();
+
+	return err;
+}
+
+static void __exit pciback_cleanup(void)
+{
+	pciback_xenbus_unregister();
+	pcistub_exit();
+}
+
+module_init(pciback_init);
+module_exit(pciback_cleanup);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
new file mode 100644
index 0000000..6744f45
--- /dev/null
+++ b/drivers/xen/pciback/pciback.h
@@ -0,0 +1,126 @@
+/*
+ * PCI Backend Common Data Structures & Function Declarations
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#ifndef __XEN_PCIBACK_H__
+#define __XEN_PCIBACK_H__
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <xen/xenbus.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <asm/atomic.h>
+#include <xen/interface/io/pciif.h>
+
+struct pci_dev_entry {
+	struct list_head list;
+	struct pci_dev *dev;
+};
+
+#define _PDEVF_op_active 	(0)
+#define PDEVF_op_active 	(1<<(_PDEVF_op_active))
+#define _PCIB_op_pending	(1)
+#define PCIB_op_pending		(1<<(_PCIB_op_pending))
+
+struct pciback_device {
+	void *pci_dev_data;
+	spinlock_t dev_lock;
+
+	struct xenbus_device *xdev;
+
+	struct xenbus_watch be_watch;
+	u8 be_watching;
+
+	int evtchn_irq;
+
+	struct vm_struct *sh_area;
+	struct xen_pci_sharedinfo *sh_info;
+
+	unsigned long flags;
+
+	struct work_struct op_work;
+};
+
+struct pciback_dev_data {
+	struct list_head config_fields;
+	int permissive;
+	int warned_on_write;
+};
+
+/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
+					    int domain, int bus,
+					    int slot, int func);
+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
+				    struct pci_dev *dev);
+void pcistub_put_pci_dev(struct pci_dev *dev);
+
+/* Ensure a device is turned off or reset */
+void pciback_reset_device(struct pci_dev *pdev);
+
+/* Access a virtual configuration space for a PCI device */
+int pciback_config_init(void);
+int pciback_config_init_dev(struct pci_dev *dev);
+void pciback_config_free_dyn_fields(struct pci_dev *dev);
+void pciback_config_reset_dev(struct pci_dev *dev);
+void pciback_config_free_dev(struct pci_dev *dev);
+int pciback_config_read(struct pci_dev *dev, int offset, int size,
+			u32 * ret_val);
+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
+
+/* Handle requests for specific devices from the frontend */
+typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
+				   unsigned int domain, unsigned int bus,
+				   unsigned int devfn, unsigned int devid);
+typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
+				    unsigned int domain, unsigned int bus);
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+			int devid, publish_pci_dev_cb publish_cb);
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
+				    unsigned int domain, unsigned int bus,
+				    unsigned int devfn);
+
+/**
+* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
+* before sending aer request to pcifront, so that guest could identify
+* device, coopearte with pciback to finish aer recovery job if device driver
+* has the capability
+*/
+
+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
+				unsigned int *domain, unsigned int *bus, unsigned int *devfn);
+int pciback_init_devices(struct pciback_device *pdev);
+int pciback_publish_pci_roots(struct pciback_device *pdev,
+			      publish_pci_root_cb cb);
+void pciback_release_devices(struct pciback_device *pdev);
+
+/* Handles events from front-end */
+irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
+void pciback_do_op(void *data);
+
+int pciback_xenbus_register(void);
+void pciback_xenbus_unregister(void);
+
+#ifdef CONFIG_PCI_MSI
+int pciback_enable_msi(struct pciback_device *pdev,
+                       struct pci_dev *dev, struct xen_pci_op *op);
+
+int pciback_disable_msi(struct pciback_device *pdev,
+                         struct pci_dev *dev, struct xen_pci_op *op);
+
+
+int pciback_enable_msix(struct pciback_device *pdev,
+                        struct pci_dev *dev, struct xen_pci_op *op);
+
+int pciback_disable_msix(struct pciback_device *pdev,
+                        struct pci_dev *dev, struct xen_pci_op *op);
+#endif
+extern int verbose_request;
+
+void test_and_schedule_op(struct pciback_device *pdev);
+#endif
+
diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
new file mode 100644
index 0000000..b85b2db
--- /dev/null
+++ b/drivers/xen/pciback/pciback_ops.c
@@ -0,0 +1,134 @@
+/*
+ * PCI Backend Operations - respond to PCI requests from Frontend
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#include <linux/module.h>
+#include <linux/wait.h>
+#include <asm/bitops.h>
+#include <xen/evtchn.h>
+#include "pciback.h"
+
+int verbose_request = 0;
+module_param(verbose_request, int, 0644);
+
+/* Ensure a device is "turned off" and ready to be exported.
+ * (Also see pciback_config_reset to ensure virtual configuration space is
+ * ready to be re-exported)
+ */
+void pciback_reset_device(struct pci_dev *dev)
+{
+	u16 cmd;
+
+	/* Disable devices (but not bridges) */
+	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
+		pci_disable_device(dev);
+
+		pci_write_config_word(dev, PCI_COMMAND, 0);
+
+		dev->is_enabled = 0;
+		dev->is_busmaster = 0;
+	} else {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		if (cmd & (PCI_COMMAND_INVALIDATE)) {
+			cmd &= ~(PCI_COMMAND_INVALIDATE);
+			pci_write_config_word(dev, PCI_COMMAND, cmd);
+
+			dev->is_busmaster = 0;
+		}
+	}
+}
+extern wait_queue_head_t aer_wait_queue;
+extern struct workqueue_struct *pciback_wq;
+/*
+* Now the same evtchn is used for both pcifront conf_read_write request
+* as well as pcie aer front end ack. We use a new work_queue to schedule
+* pciback conf_read_write service for avoiding confict with aer_core
+* do_recovery job which also use the system default work_queue
+*/
+void test_and_schedule_op(struct pciback_device *pdev)
+{
+	/* Check that frontend is requesting an operation and that we are not
+	 * already processing a request */
+	if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
+	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
+	{
+		queue_work(pciback_wq, &pdev->op_work);
+	}
+	/*_XEN_PCIB_active should have been cleared by pcifront. And also make
+	sure pciback is waiting for ack by checking _PCIB_op_pending*/
+	if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags)
+	    &&test_bit(_PCIB_op_pending, &pdev->flags)) {
+		wake_up(&aer_wait_queue);
+	}
+}
+
+/* Performing the configuration space reads/writes must not be done in atomic
+ * context because some of the pci_* functions can sleep (mostly due to ACPI
+ * use of semaphores). This function is intended to be called from a work
+ * queue in process context taking a struct pciback_device as a parameter */
+void pciback_do_op(void *data)
+{
+	struct pciback_device *pdev = data;
+	struct pci_dev *dev;
+	struct xen_pci_op *op = &pdev->sh_info->op;
+
+	dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
+
+	if (dev == NULL)
+		op->err = XEN_PCI_ERR_dev_not_found;
+	else
+	{
+		switch (op->cmd)
+		{
+			case XEN_PCI_OP_conf_read:
+				op->err = pciback_config_read(dev,
+					  op->offset, op->size, &op->value);
+				break;
+			case XEN_PCI_OP_conf_write:
+				op->err = pciback_config_write(dev,
+					  op->offset, op->size,	op->value);
+				break;
+#ifdef CONFIG_PCI_MSI
+			case XEN_PCI_OP_enable_msi:
+				op->err = pciback_enable_msi(pdev, dev, op);
+				break;
+			case XEN_PCI_OP_disable_msi:
+				op->err = pciback_disable_msi(pdev, dev, op);
+				break;
+			case XEN_PCI_OP_enable_msix:
+				op->err = pciback_enable_msix(pdev, dev, op);
+				break;
+			case XEN_PCI_OP_disable_msix:
+				op->err = pciback_disable_msix(pdev, dev, op);
+				break;
+#endif
+			default:
+				op->err = XEN_PCI_ERR_not_implemented;
+				break;
+		}
+	}
+	/* Tell the driver domain that we're done. */
+	wmb();
+	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+	notify_remote_via_irq(pdev->evtchn_irq);
+
+	/* Mark that we're done. */
+	smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
+	clear_bit(_PDEVF_op_active, &pdev->flags);
+	smp_mb__after_clear_bit(); /* /before/ final check for work */
+
+	/* Check to see if the driver domain tried to start another request in
+	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
+	*/
+	test_and_schedule_op(pdev);
+}
+
+irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct pciback_device *pdev = dev_id;
+
+	test_and_schedule_op(pdev);
+
+	return IRQ_HANDLED;
+}
diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c
new file mode 100644
index 0000000..105a8b6
--- /dev/null
+++ b/drivers/xen/pciback/slot.c
@@ -0,0 +1,187 @@
+/*
+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
+ *               to the frontend
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil> (vpci.c)
+ *   Author: Tristan Gingold <tristan.gingold@bull.net>, from vpci.c
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include "pciback.h"
+
+/* There are at most 32 slots in a pci bus.  */
+#define PCI_SLOT_MAX 32
+
+#define PCI_BUS_NBR 2
+
+struct slot_dev_data {
+	/* Access to dev_list must be protected by lock */
+	struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
+	spinlock_t lock;
+};
+
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
+				    unsigned int domain, unsigned int bus,
+				    unsigned int devfn)
+{
+	struct pci_dev *dev = NULL;
+	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
+	unsigned long flags;
+
+	if (domain != 0 || PCI_FUNC(devfn) != 0)
+		return NULL;
+
+	if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
+		return NULL;
+
+	spin_lock_irqsave(&slot_dev->lock, flags);
+	dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
+	spin_unlock_irqrestore(&slot_dev->lock, flags);
+
+	return dev;
+}
+
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+			int devid, publish_pci_dev_cb publish_cb)
+{
+	int err = 0, slot, bus;
+	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
+	unsigned long flags;
+
+	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
+		err = -EFAULT;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Can't export bridges on the virtual PCI bus");
+		goto out;
+	}
+
+	spin_lock_irqsave(&slot_dev->lock, flags);
+
+	/* Assign to a new slot on the virtual PCI bus */
+	for (bus = 0; bus < PCI_BUS_NBR; bus++)
+		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+			if (slot_dev->slots[bus][slot] == NULL) {
+				printk(KERN_INFO
+				       "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
+				       pci_name(dev), slot, bus);
+				slot_dev->slots[bus][slot] = dev;
+				goto unlock;
+			}
+		}
+
+	err = -ENOMEM;
+	xenbus_dev_fatal(pdev->xdev, err,
+			 "No more space on root virtual PCI bus");
+
+      unlock:
+	spin_unlock_irqrestore(&slot_dev->lock, flags);
+
+	/* Publish this device. */
+	if(!err)
+		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
+
+      out:
+	return err;
+}
+
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
+{
+	int slot, bus;
+	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
+	struct pci_dev *found_dev = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&slot_dev->lock, flags);
+
+	for (bus = 0; bus < PCI_BUS_NBR; bus++)
+		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+			if (slot_dev->slots[bus][slot] == dev) {
+				slot_dev->slots[bus][slot] = NULL;
+				found_dev = dev;
+				goto out;
+			}
+		}
+
+      out:
+	spin_unlock_irqrestore(&slot_dev->lock, flags);
+
+	if (found_dev)
+		pcistub_put_pci_dev(found_dev);
+}
+
+int pciback_init_devices(struct pciback_device *pdev)
+{
+	int slot, bus;
+	struct slot_dev_data *slot_dev;
+
+	slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
+	if (!slot_dev)
+		return -ENOMEM;
+
+	spin_lock_init(&slot_dev->lock);
+
+	for (bus = 0; bus < PCI_BUS_NBR; bus++)
+		for (slot = 0; slot < PCI_SLOT_MAX; slot++)
+			slot_dev->slots[bus][slot] = NULL;
+
+	pdev->pci_dev_data = slot_dev;
+
+	return 0;
+}
+
+int pciback_publish_pci_roots(struct pciback_device *pdev,
+			      publish_pci_root_cb publish_cb)
+{
+	/* The Virtual PCI bus has only one root */
+	return publish_cb(pdev, 0, 0);
+}
+
+void pciback_release_devices(struct pciback_device *pdev)
+{
+	int slot, bus;
+	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
+	struct pci_dev *dev;
+
+	for (bus = 0; bus < PCI_BUS_NBR; bus++)
+		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+			dev = slot_dev->slots[bus][slot];
+			if (dev != NULL)
+				pcistub_put_pci_dev(dev);
+		}
+
+	kfree(slot_dev);
+	pdev->pci_dev_data = NULL;
+}
+
+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
+		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
+{
+	int slot, busnr;
+	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
+	struct pci_dev *dev;
+	int found = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&slot_dev->lock, flags);
+
+	for (busnr = 0; busnr < PCI_BUS_NBR; bus++)
+		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+			dev = slot_dev->slots[busnr][slot];
+			if (dev && dev->bus->number == pcidev->bus->number
+				&& dev->devfn == pcidev->devfn
+				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) {
+				found = 1;
+				*domain = 0;
+				*bus = busnr;
+				*devfn = PCI_DEVFN(slot,0);
+				goto out;
+			}
+		}
+out:
+	spin_unlock_irqrestore(&slot_dev->lock, flags);
+	return found;
+
+}
diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
new file mode 100644
index 0000000..a5b7ece
--- /dev/null
+++ b/drivers/xen/pciback/vpci.c
@@ -0,0 +1,242 @@
+/*
+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
+ *               to the frontend
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include "pciback.h"
+
+#define PCI_SLOT_MAX 32
+
+struct vpci_dev_data {
+	/* Access to dev_list must be protected by lock */
+	struct list_head dev_list[PCI_SLOT_MAX];
+	spinlock_t lock;
+};
+
+static inline struct list_head *list_first(struct list_head *head)
+{
+	return head->next;
+}
+
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
+				    unsigned int domain, unsigned int bus,
+				    unsigned int devfn)
+{
+	struct pci_dev_entry *entry;
+	struct pci_dev *dev = NULL;
+	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+	unsigned long flags;
+
+	if (domain != 0 || bus != 0)
+		return NULL;
+
+	if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
+		spin_lock_irqsave(&vpci_dev->lock, flags);
+
+		list_for_each_entry(entry,
+				    &vpci_dev->dev_list[PCI_SLOT(devfn)],
+				    list) {
+			if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
+				dev = entry->dev;
+				break;
+			}
+		}
+
+		spin_unlock_irqrestore(&vpci_dev->lock, flags);
+	}
+	return dev;
+}
+
+static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
+{
+	if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
+	    && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
+		return 1;
+
+	return 0;
+}
+
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
+			int devid, publish_pci_dev_cb publish_cb)
+{
+	int err = 0, slot, func;
+	struct pci_dev_entry *t, *dev_entry;
+	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+	unsigned long flags;
+
+	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
+		err = -EFAULT;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Can't export bridges on the virtual PCI bus");
+		goto out;
+	}
+
+	dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
+	if (!dev_entry) {
+		err = -ENOMEM;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error adding entry to virtual PCI bus");
+		goto out;
+	}
+
+	dev_entry->dev = dev;
+
+	spin_lock_irqsave(&vpci_dev->lock, flags);
+
+	/* Keep multi-function devices together on the virtual PCI bus */
+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+		if (!list_empty(&vpci_dev->dev_list[slot])) {
+			t = list_entry(list_first(&vpci_dev->dev_list[slot]),
+				       struct pci_dev_entry, list);
+
+			if (match_slot(dev, t->dev)) {
+				pr_info("pciback: vpci: %s: "
+					"assign to virtual slot %d func %d\n",
+					pci_name(dev), slot,
+					PCI_FUNC(dev->devfn));
+				list_add_tail(&dev_entry->list,
+					      &vpci_dev->dev_list[slot]);
+				func = PCI_FUNC(dev->devfn);
+				goto unlock;
+			}
+		}
+	}
+
+	/* Assign to a new slot on the virtual PCI bus */
+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+		if (list_empty(&vpci_dev->dev_list[slot])) {
+			printk(KERN_INFO
+			       "pciback: vpci: %s: assign to virtual slot %d\n",
+			       pci_name(dev), slot);
+			list_add_tail(&dev_entry->list,
+				      &vpci_dev->dev_list[slot]);
+			func = PCI_FUNC(dev->devfn);
+			goto unlock;
+		}
+	}
+
+	err = -ENOMEM;
+	xenbus_dev_fatal(pdev->xdev, err,
+			 "No more space on root virtual PCI bus");
+
+      unlock:
+	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+
+	/* Publish this device. */
+	if(!err)
+		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
+
+      out:
+	return err;
+}
+
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
+{
+	int slot;
+	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+	struct pci_dev *found_dev = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vpci_dev->lock, flags);
+
+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+		struct pci_dev_entry *e, *tmp;
+		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
+					 list) {
+			if (e->dev == dev) {
+				list_del(&e->list);
+				found_dev = e->dev;
+				kfree(e);
+				goto out;
+			}
+		}
+	}
+
+      out:
+	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+
+	if (found_dev)
+		pcistub_put_pci_dev(found_dev);
+}
+
+int pciback_init_devices(struct pciback_device *pdev)
+{
+	int slot;
+	struct vpci_dev_data *vpci_dev;
+
+	vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
+	if (!vpci_dev)
+		return -ENOMEM;
+
+	spin_lock_init(&vpci_dev->lock);
+
+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
+	}
+
+	pdev->pci_dev_data = vpci_dev;
+
+	return 0;
+}
+
+int pciback_publish_pci_roots(struct pciback_device *pdev,
+			      publish_pci_root_cb publish_cb)
+{
+	/* The Virtual PCI bus has only one root */
+	return publish_cb(pdev, 0, 0);
+}
+
+void pciback_release_devices(struct pciback_device *pdev)
+{
+	int slot;
+	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+
+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+		struct pci_dev_entry *e, *tmp;
+		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
+					 list) {
+			list_del(&e->list);
+			pcistub_put_pci_dev(e->dev);
+			kfree(e);
+		}
+	}
+
+	kfree(vpci_dev);
+	pdev->pci_dev_data = NULL;
+}
+
+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
+		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
+{
+	struct pci_dev_entry *entry;
+	struct pci_dev *dev = NULL;
+	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
+	unsigned long flags;
+	int found = 0, slot;
+
+	spin_lock_irqsave(&vpci_dev->lock, flags);
+	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+		list_for_each_entry(entry,
+			    &vpci_dev->dev_list[slot],
+			    list) {
+			dev = entry->dev;
+			if (dev && dev->bus->number == pcidev->bus->number
+				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)
+				&& dev->devfn == pcidev->devfn)
+			{
+				found = 1;
+				*domain = 0;
+				*bus = 0;
+				*devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn));
+			}
+		}
+	}
+	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+	return found;
+}
diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
new file mode 100644
index 0000000..4d56c45
--- /dev/null
+++ b/drivers/xen/pciback/xenbus.c
@@ -0,0 +1,710 @@
+/*
+ * PCI Backend Xenbus Setup - handles setup with frontend and xend
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+#include <xen/xenbus.h>
+#include <xen/evtchn.h>
+#include "pciback.h"
+
+#define INVALID_EVTCHN_IRQ  (-1)
+struct workqueue_struct *pciback_wq;
+
+static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
+{
+	struct pciback_device *pdev;
+
+	pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
+	if (pdev == NULL)
+		goto out;
+	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
+
+	pdev->xdev = xdev;
+	xdev->dev.driver_data = pdev;
+
+	spin_lock_init(&pdev->dev_lock);
+
+	pdev->sh_area = NULL;
+	pdev->sh_info = NULL;
+	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+	pdev->be_watching = 0;
+
+	INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
+
+	if (pciback_init_devices(pdev)) {
+		kfree(pdev);
+		pdev = NULL;
+	}
+      out:
+	return pdev;
+}
+
+static void pciback_disconnect(struct pciback_device *pdev)
+{
+	spin_lock(&pdev->dev_lock);
+
+	/* Ensure the guest can't trigger our handler before removing devices */
+	if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
+		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
+		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
+	}
+
+	/* If the driver domain started an op, make sure we complete it
+	 * before releasing the shared memory */
+	flush_workqueue(pciback_wq);
+
+	if (pdev->sh_info != NULL) {
+		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
+		pdev->sh_info = NULL;
+	}
+
+	spin_unlock(&pdev->dev_lock);
+}
+
+static void free_pdev(struct pciback_device *pdev)
+{
+	if (pdev->be_watching)
+		unregister_xenbus_watch(&pdev->be_watch);
+
+	pciback_disconnect(pdev);
+
+	pciback_release_devices(pdev);
+
+	pdev->xdev->dev.driver_data = NULL;
+	pdev->xdev = NULL;
+
+	kfree(pdev);
+}
+
+static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
+			     int remote_evtchn)
+{
+	int err = 0;
+	struct vm_struct *area;
+
+	dev_dbg(&pdev->xdev->dev,
+		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
+		gnt_ref, remote_evtchn);
+
+	area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
+	if (IS_ERR(area)) {
+		err = PTR_ERR(area);
+		goto out;
+	}
+	pdev->sh_area = area;
+	pdev->sh_info = area->addr;
+
+	err = bind_interdomain_evtchn_to_irqhandler(
+		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
+		SA_SAMPLE_RANDOM, "pciback", pdev);
+	if (err < 0) {
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error binding event channel to IRQ");
+		goto out;
+	}
+	pdev->evtchn_irq = err;
+	err = 0;
+
+	dev_dbg(&pdev->xdev->dev, "Attached!\n");
+      out:
+	return err;
+}
+
+static int pciback_attach(struct pciback_device *pdev)
+{
+	int err = 0;
+	int gnt_ref, remote_evtchn;
+	char *magic = NULL;
+
+	spin_lock(&pdev->dev_lock);
+
+	/* Make sure we only do this setup once */
+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+	    XenbusStateInitialised)
+		goto out;
+
+	/* Wait for frontend to state that it has published the configuration */
+	if (xenbus_read_driver_state(pdev->xdev->otherend) !=
+	    XenbusStateInitialised)
+		goto out;
+
+	dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
+
+	err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
+			    "pci-op-ref", "%u", &gnt_ref,
+			    "event-channel", "%u", &remote_evtchn,
+			    "magic", NULL, &magic, NULL);
+	if (err) {
+		/* If configuration didn't get read correctly, wait longer */
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error reading configuration from frontend");
+		goto out;
+	}
+
+	if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
+		xenbus_dev_fatal(pdev->xdev, -EFAULT,
+				 "version mismatch (%s/%s) with pcifront - "
+				 "halting pciback",
+				 magic, XEN_PCI_MAGIC);
+		goto out;
+	}
+
+	err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
+	if (err)
+		goto out;
+
+	dev_dbg(&pdev->xdev->dev, "Connecting...\n");
+
+	err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
+	if (err)
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error switching to connected state!");
+
+	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
+      out:
+	spin_unlock(&pdev->dev_lock);
+
+	if (magic)
+		kfree(magic);
+
+	return err;
+}
+
+static int pciback_publish_pci_dev(struct pciback_device *pdev,
+				   unsigned int domain, unsigned int bus,
+				   unsigned int devfn, unsigned int devid)
+{
+	int err;
+	int len;
+	char str[64];
+
+	len = snprintf(str, sizeof(str), "vdev-%d", devid);
+	if (unlikely(len >= (sizeof(str) - 1))) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
+			    "%04x:%02x:%02x.%02x", domain, bus,
+			    PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+      out:
+	return err;
+}
+
+static int pciback_export_device(struct pciback_device *pdev,
+				 int domain, int bus, int slot, int func,
+				 int devid)
+{
+	struct pci_dev *dev;
+	int err = 0;
+
+	dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
+		domain, bus, slot, func);
+
+	dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
+	if (!dev) {
+		err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Couldn't locate PCI device "
+				 "(%04x:%02x:%02x.%01x)! "
+				 "perhaps already in-use?",
+				 domain, bus, slot, func);
+		goto out;
+	}
+
+	err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev);
+	if (err)
+		goto out;
+
+	/* TODO: It'd be nice to export a bridge and have all of its children
+	 * get exported with it. This may be best done in xend (which will
+	 * have to calculate resource usage anyway) but we probably want to
+	 * put something in here to ensure that if a bridge gets given to a
+	 * driver domain, that all devices under that bridge are not given
+	 * to other driver domains (as he who controls the bridge can disable
+	 * it and stop the other devices from working).
+	 */
+      out:
+	return err;
+}
+
+static int pciback_remove_device(struct pciback_device *pdev,
+				 int domain, int bus, int slot, int func)
+{
+	int err = 0;
+	struct pci_dev *dev;
+
+	dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
+		domain, bus, slot, func);
+
+	dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
+	if (!dev) {
+		err = -EINVAL;
+		dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
+			"(%04x:%02x:%02x.%01x)! not owned by this domain\n",
+			domain, bus, slot, func);
+		goto out;
+	}
+
+	pciback_release_pci_dev(pdev, dev);
+
+      out:
+	return err;
+}
+
+static int pciback_publish_pci_root(struct pciback_device *pdev,
+				    unsigned int domain, unsigned int bus)
+{
+	unsigned int d, b;
+	int i, root_num, len, err;
+	char str[64];
+
+	dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+			   "root_num", "%d", &root_num);
+	if (err == 0 || err == -ENOENT)
+		root_num = 0;
+	else if (err < 0)
+		goto out;
+
+	/* Verify that we haven't already published this pci root */
+	for (i = 0; i < root_num; i++) {
+		len = snprintf(str, sizeof(str), "root-%d", i);
+		if (unlikely(len >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+				   str, "%x:%x", &d, &b);
+		if (err < 0)
+			goto out;
+		if (err != 2) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (d == domain && b == bus) {
+			err = 0;
+			goto out;
+		}
+	}
+
+	len = snprintf(str, sizeof(str), "root-%d", root_num);
+	if (unlikely(len >= (sizeof(str) - 1))) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
+		root_num, domain, bus);
+
+	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
+			    "%04x:%02x", domain, bus);
+	if (err)
+		goto out;
+
+	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
+			    "root_num", "%d", (root_num + 1));
+
+      out:
+	return err;
+}
+
+static int pciback_reconfigure(struct pciback_device *pdev)
+{
+	int err = 0;
+	int num_devs;
+	int domain, bus, slot, func;
+	int substate;
+	int i, len;
+	char state_str[64];
+	char dev_str[64];
+
+	spin_lock(&pdev->dev_lock);
+
+	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
+
+	/* Make sure we only reconfigure once */
+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+	    XenbusStateReconfiguring)
+		goto out;
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
+			   &num_devs);
+	if (err != 1) {
+		if (err >= 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error reading number of devices");
+		goto out;
+	}
+
+	for (i = 0; i < num_devs; i++) {
+		len = snprintf(state_str, sizeof(state_str), "state-%d", i);
+		if (unlikely(len >= (sizeof(state_str) - 1))) {
+			err = -ENOMEM;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "String overflow while reading "
+					 "configuration");
+			goto out;
+		}
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
+				   "%d", &substate);
+		if (err != 1)
+			substate = XenbusStateUnknown;
+
+		switch (substate) {
+		case XenbusStateInitialising:
+			dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
+
+			len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
+			if (unlikely(len >= (sizeof(dev_str) - 1))) {
+				err = -ENOMEM;
+				xenbus_dev_fatal(pdev->xdev, err,
+						 "String overflow while "
+						 "reading configuration");
+				goto out;
+			}
+			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+					   dev_str, "%x:%x:%x.%x",
+					   &domain, &bus, &slot, &func);
+			if (err < 0) {
+				xenbus_dev_fatal(pdev->xdev, err,
+						 "Error reading device "
+						 "configuration");
+				goto out;
+			}
+			if (err != 4) {
+				err = -EINVAL;
+				xenbus_dev_fatal(pdev->xdev, err,
+						 "Error parsing pci device "
+						 "configuration");
+				goto out;
+			}
+
+			err = pciback_export_device(pdev, domain, bus, slot,
+						    func, i);
+			if (err)
+				goto out;
+
+			/* Publish pci roots. */
+			err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
+			if (err) {
+				xenbus_dev_fatal(pdev->xdev, err,
+						 "Error while publish PCI root"
+						 "buses for frontend");
+				goto out;
+			}
+
+			err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
+					    state_str, "%d",
+					    XenbusStateInitialised);
+			if (err) {
+				xenbus_dev_fatal(pdev->xdev, err,
+						 "Error switching substate of "
+						 "dev-%d\n", i);
+				goto out;
+			}
+			break;
+
+		case XenbusStateClosing:
+			dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
+
+			len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
+			if (unlikely(len >= (sizeof(dev_str) - 1))) {
+				err = -ENOMEM;
+				xenbus_dev_fatal(pdev->xdev, err,
+						 "String overflow while "
+						 "reading configuration");
+				goto out;
+			}
+			err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
+					   dev_str, "%x:%x:%x.%x",
+					   &domain, &bus, &slot, &func);
+			if (err < 0) {
+				xenbus_dev_fatal(pdev->xdev, err,
+						 "Error reading device "
+						 "configuration");
+				goto out;
+			}
+			if (err != 4) {
+				err = -EINVAL;
+				xenbus_dev_fatal(pdev->xdev, err,
+						 "Error parsing pci device "
+						 "configuration");
+				goto out;
+			}
+
+			err = pciback_remove_device(pdev, domain, bus, slot,
+						    func);
+			if(err)
+				goto out;
+
+			/* TODO: If at some point we implement support for pci
+			 * root hot-remove on pcifront side, we'll need to
+			 * remove unnecessary xenstore nodes of pci roots here.
+			 */
+
+			break;
+
+		default:
+			break;
+		}
+	}
+
+	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
+	if (err) {
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error switching to reconfigured state!");
+		goto out;
+	}
+
+      out:
+	spin_unlock(&pdev->dev_lock);
+
+	return 0;
+}
+
+static void pciback_frontend_changed(struct xenbus_device *xdev,
+				     enum xenbus_state fe_state)
+{
+	struct pciback_device *pdev = xdev->dev.driver_data;
+
+	dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
+
+	switch (fe_state) {
+	case XenbusStateInitialised:
+		pciback_attach(pdev);
+		break;
+
+	case XenbusStateReconfiguring:
+		pciback_reconfigure(pdev);
+		break;
+
+	case XenbusStateConnected:
+		/* pcifront switched its state from reconfiguring to connected.
+		 * Then switch to connected state.
+		 */
+		xenbus_switch_state(xdev, XenbusStateConnected);
+		break;
+
+	case XenbusStateClosing:
+		pciback_disconnect(pdev);
+		xenbus_switch_state(xdev, XenbusStateClosing);
+		break;
+
+	case XenbusStateClosed:
+		pciback_disconnect(pdev);
+		xenbus_switch_state(xdev, XenbusStateClosed);
+		if (xenbus_dev_is_online(xdev))
+			break;
+		/* fall through if not online */
+	case XenbusStateUnknown:
+		dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
+		device_unregister(&xdev->dev);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static int pciback_setup_backend(struct pciback_device *pdev)
+{
+	/* Get configuration from xend (if available now) */
+	int domain, bus, slot, func;
+	int err = 0;
+	int i, num_devs;
+	char dev_str[64];
+	char state_str[64];
+
+	spin_lock(&pdev->dev_lock);
+
+	/* It's possible we could get the call to setup twice, so make sure
+	 * we're not already connected.
+	 */
+	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+	    XenbusStateInitWait)
+		goto out;
+
+	dev_dbg(&pdev->xdev->dev, "getting be setup\n");
+
+	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
+			   &num_devs);
+	if (err != 1) {
+		if (err >= 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error reading number of devices");
+		goto out;
+	}
+
+	for (i = 0; i < num_devs; i++) {
+		int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
+		if (unlikely(l >= (sizeof(dev_str) - 1))) {
+			err = -ENOMEM;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "String overflow while reading "
+					 "configuration");
+			goto out;
+		}
+
+		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
+				   "%x:%x:%x.%x", &domain, &bus, &slot, &func);
+		if (err < 0) {
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error reading device configuration");
+			goto out;
+		}
+		if (err != 4) {
+			err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "Error parsing pci device "
+					 "configuration");
+			goto out;
+		}
+
+		err = pciback_export_device(pdev, domain, bus, slot, func, i);
+		if (err)
+			goto out;
+
+		/* Switch substate of this device. */
+		l = snprintf(state_str, sizeof(state_str), "state-%d", i);
+		if (unlikely(l >= (sizeof(state_str) - 1))) {
+			err = -ENOMEM;
+			xenbus_dev_fatal(pdev->xdev, err,
+					 "String overflow while reading "
+					 "configuration");
+			goto out;
+		}
+		err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
+				    "%d", XenbusStateInitialised);
+		if (err) {
+			xenbus_dev_fatal(pdev->xdev, err, "Error switching "
+					 "substate of dev-%d\n", i);
+			goto out;
+		}
+	}
+
+	err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
+	if (err) {
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error while publish PCI root buses "
+				 "for frontend");
+		goto out;
+	}
+
+	err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
+	if (err)
+		xenbus_dev_fatal(pdev->xdev, err,
+				 "Error switching to initialised state!");
+
+      out:
+	spin_unlock(&pdev->dev_lock);
+
+	if (!err)
+		/* see if pcifront is already configured (if not, we'll wait) */
+		pciback_attach(pdev);
+
+	return err;
+}
+
+static void pciback_be_watch(struct xenbus_watch *watch,
+			     const char **vec, unsigned int len)
+{
+	struct pciback_device *pdev =
+	    container_of(watch, struct pciback_device, be_watch);
+
+	switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
+	case XenbusStateInitWait:
+		pciback_setup_backend(pdev);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static int pciback_xenbus_probe(struct xenbus_device *dev,
+				const struct xenbus_device_id *id)
+{
+	int err = 0;
+	struct pciback_device *pdev = alloc_pdev(dev);
+
+	if (pdev == NULL) {
+		err = -ENOMEM;
+		xenbus_dev_fatal(dev, err,
+				 "Error allocating pciback_device struct");
+		goto out;
+	}
+
+	/* wait for xend to configure us */
+	err = xenbus_switch_state(dev, XenbusStateInitWait);
+	if (err)
+		goto out;
+
+	/* watch the backend node for backend configuration information */
+	err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
+				pciback_be_watch);
+	if (err)
+		goto out;
+	pdev->be_watching = 1;
+
+	/* We need to force a call to our callback here in case
+	 * xend already configured us!
+	 */
+	pciback_be_watch(&pdev->be_watch, NULL, 0);
+
+      out:
+	return err;
+}
+
+static int pciback_xenbus_remove(struct xenbus_device *dev)
+{
+	struct pciback_device *pdev = dev->dev.driver_data;
+
+	if (pdev != NULL)
+		free_pdev(pdev);
+
+	return 0;
+}
+
+static const struct xenbus_device_id xenpci_ids[] = {
+	{"pci"},
+	{{0}},
+};
+
+static struct xenbus_driver xenbus_pciback_driver = {
+	.name 			= "pciback",
+	.owner 			= THIS_MODULE,
+	.ids 			= xenpci_ids,
+	.probe 			= pciback_xenbus_probe,
+	.remove 		= pciback_xenbus_remove,
+	.otherend_changed 	= pciback_frontend_changed,
+};
+
+int __init pciback_xenbus_register(void)
+{
+	if (!is_running_on_xen())
+		return -ENODEV;
+	pciback_wq = create_workqueue("pciback_workqueue");
+	if (!pciback_wq) {
+		printk(KERN_ERR "pciback_xenbus_register: create"
+			"pciback_workqueue failed\n");
+		return -EFAULT;
+	}
+	return xenbus_register_backend(&xenbus_pciback_driver);
+}
+
+void __exit pciback_xenbus_unregister(void)
+{
+	destroy_workqueue(pciback_wq);
+	xenbus_unregister_driver(&xenbus_pciback_driver);
+}
--
1.7.4


From cf2a64556286b762ce6a3a9b408ba7ecdcaea03a Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 13 Oct 2009 17:22:22 -0400
Subject: [PATCH 013/197] xen-pciback: Fix include header name change (evtchn.h is now events.h)

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/pci_stub.c    |    2 +-
 drivers/xen/pciback/pciback_ops.c |    2 +-
 drivers/xen/pciback/xenbus.c      |    2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index c481a73..c02f21f 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -13,7 +13,7 @@
 #include <linux/pci.h>
 #include <linux/wait.h>
 #include <asm/atomic.h>
-#include <xen/evtchn.h>
+#include <xen/events.h>
 #include "pciback.h"
 #include "conf_space.h"
 #include "conf_space_quirks.h"
diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
index b85b2db..58d09eb 100644
--- a/drivers/xen/pciback/pciback_ops.c
+++ b/drivers/xen/pciback/pciback_ops.c
@@ -6,7 +6,7 @@
 #include <linux/module.h>
 #include <linux/wait.h>
 #include <asm/bitops.h>
-#include <xen/evtchn.h>
+#include <xen/events.h>
 #include "pciback.h"

 int verbose_request = 0;
diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
index 4d56c45..bbca3fe 100644
--- a/drivers/xen/pciback/xenbus.c
+++ b/drivers/xen/pciback/xenbus.c
@@ -8,7 +8,7 @@
 #include <linux/list.h>
 #include <linux/vmalloc.h>
 #include <xen/xenbus.h>
-#include <xen/evtchn.h>
+#include <xen/events.h>
 #include "pciback.h"

 #define INVALID_EVTCHN_IRQ  (-1)
--
1.7.4


From f6222ae41f2fee3f67983f833ee8dcba2c7a1362 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 13 Oct 2009 17:22:24 -0400
Subject: [PATCH 014/197] xen-pciback: Use pci_is_enabled() instead of is_enabled.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/conf_space_header.c |    4 ++--
 drivers/xen/pciback/pciback_ops.c       |    1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
index f794e12..5a9e028 100644
--- a/drivers/xen/pciback/conf_space_header.c
+++ b/drivers/xen/pciback/conf_space_header.c
@@ -22,14 +22,14 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 {
 	int err;

-	if (!dev->is_enabled && is_enable_cmd(value)) {
+	if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
 		if (unlikely(verbose_request))
 			printk(KERN_DEBUG "pciback: %s: enable\n",
 			       pci_name(dev));
 		err = pci_enable_device(dev);
 		if (err)
 			return err;
-	} else if (dev->is_enabled && !is_enable_cmd(value)) {
+	} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
 		if (unlikely(verbose_request))
 			printk(KERN_DEBUG "pciback: %s: disable\n",
 			       pci_name(dev));
diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
index 58d09eb..2d570e7 100644
--- a/drivers/xen/pciback/pciback_ops.c
+++ b/drivers/xen/pciback/pciback_ops.c
@@ -26,7 +26,6 @@ void pciback_reset_device(struct pci_dev *dev)

 		pci_write_config_word(dev, PCI_COMMAND, 0);

-		dev->is_enabled = 0;
 		dev->is_busmaster = 0;
 	} else {
 		pci_read_config_word(dev, PCI_COMMAND, &cmd);
--
1.7.4


From 0d379d03a3284e4b4d890b7e1b8163d485cc72e6 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 13 Oct 2009 17:22:25 -0400
Subject: [PATCH 015/197] xen-pciback: Fix usage of INIT_WORK.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/pciback.h     |    4 ++--
 drivers/xen/pciback/pciback_ops.c |    7 ++++---
 drivers/xen/pciback/xenbus.c      |    3 ++-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
index 6744f45..4fb8c05 100644
--- a/drivers/xen/pciback/pciback.h
+++ b/drivers/xen/pciback/pciback.h
@@ -99,8 +99,8 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
 void pciback_release_devices(struct pciback_device *pdev);

 /* Handles events from front-end */
-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
-void pciback_do_op(void *data);
+irqreturn_t pciback_handle_event(int irq, void *dev_id);
+void pciback_do_op(struct work_struct *data);

 int pciback_xenbus_register(void);
 void pciback_xenbus_unregister(void);
diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
index 2d570e7..6624faf 100644
--- a/drivers/xen/pciback/pciback_ops.c
+++ b/drivers/xen/pciback/pciback_ops.c
@@ -66,9 +66,10 @@ void test_and_schedule_op(struct pciback_device *pdev)
  * context because some of the pci_* functions can sleep (mostly due to ACPI
  * use of semaphores). This function is intended to be called from a work
  * queue in process context taking a struct pciback_device as a parameter */
-void pciback_do_op(void *data)
+
+void pciback_do_op(struct work_struct *data)
 {
-	struct pciback_device *pdev = data;
+	struct pciback_device *pdev = container_of(data, struct pciback_device, op_work);
 	struct pci_dev *dev;
 	struct xen_pci_op *op = &pdev->sh_info->op;

@@ -123,7 +124,7 @@ void pciback_do_op(void *data)
 	test_and_schedule_op(pdev);
 }

-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
+irqreturn_t pciback_handle_event(int irq, void *dev_id)
 {
 	struct pciback_device *pdev = dev_id;

diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
index bbca3fe..bd52289 100644
--- a/drivers/xen/pciback/xenbus.c
+++ b/drivers/xen/pciback/xenbus.c
@@ -9,6 +9,7 @@
 #include <linux/vmalloc.h>
 #include <xen/xenbus.h>
 #include <xen/events.h>
+#include <linux/workqueue.h>
 #include "pciback.h"

 #define INVALID_EVTCHN_IRQ  (-1)
@@ -33,7 +34,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
 	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
 	pdev->be_watching = 0;

-	INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
+	INIT_WORK(&pdev->op_work, pciback_do_op);

 	if (pciback_init_devices(pdev)) {
 		kfree(pdev);
--
1.7.4


From 57f6c49d0f428f96cca49147d68b0bb6156613a6 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 13 Oct 2009 17:22:26 -0400
Subject: [PATCH 016/197] xen-pciback: Update the calling mechanism for xenbus_[map|unmap]_ring_valloc functions.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/pciback.h |    1 -
 drivers/xen/pciback/xenbus.c  |   18 +++++++++---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
index 4fb8c05..5e8e14e 100644
--- a/drivers/xen/pciback/pciback.h
+++ b/drivers/xen/pciback/pciback.h
@@ -36,7 +36,6 @@ struct pciback_device {

 	int evtchn_irq;

-	struct vm_struct *sh_area;
 	struct xen_pci_sharedinfo *sh_info;

 	unsigned long flags;
diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
index bd52289..5be1350 100644
--- a/drivers/xen/pciback/xenbus.c
+++ b/drivers/xen/pciback/xenbus.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/vmalloc.h>
+#include <linux/workqueue.h>
 #include <xen/xenbus.h>
 #include <xen/events.h>
 #include <linux/workqueue.h>
@@ -29,7 +30,6 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)

 	spin_lock_init(&pdev->dev_lock);

-	pdev->sh_area = NULL;
 	pdev->sh_info = NULL;
 	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
 	pdev->be_watching = 0;
@@ -59,7 +59,7 @@ static void pciback_disconnect(struct pciback_device *pdev)
 	flush_workqueue(pciback_wq);

 	if (pdev->sh_info != NULL) {
-		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
+		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
 		pdev->sh_info = NULL;
 	}

@@ -85,23 +85,23 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
 			     int remote_evtchn)
 {
 	int err = 0;
-	struct vm_struct *area;
+	void *vaddr;

 	dev_dbg(&pdev->xdev->dev,
 		"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
 		gnt_ref, remote_evtchn);

-	area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
-	if (IS_ERR(area)) {
-		err = PTR_ERR(area);
+	err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
+	if (err < 0) {
+		xenbus_dev_fatal(pdev->xdev, err,
+				"Error mapping other domain page in ours.");
 		goto out;
 	}
-	pdev->sh_area = area;
-	pdev->sh_info = area->addr;
+	pdev->sh_info = vaddr;

 	err = bind_interdomain_evtchn_to_irqhandler(
 		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
-		SA_SAMPLE_RANDOM, "pciback", pdev);
+		0, "pciback", pdev);
 	if (err < 0) {
 		xenbus_dev_fatal(pdev->xdev, err,
 				 "Error binding event channel to IRQ");
--
1.7.4


From 6e86fcb926e41fb55f512972603e5aaf77e2efb8 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 13 Oct 2009 17:22:30 -0400
Subject: [PATCH 017/197] xen-pciback: Add check to load only under priviliged domain.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/pci_stub.c |    5 +++++
 drivers/xen/pciback/xenbus.c   |    2 --
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index c02f21f..d97dac5 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -14,6 +14,8 @@
 #include <linux/wait.h>
 #include <asm/atomic.h>
 #include <xen/events.h>
+#include <asm/xen/pci.h>
+#include <asm/xen/hypervisor.h>
 #include "pciback.h"
 #include "conf_space.h"
 #include "conf_space_quirks.h"
@@ -1286,6 +1288,9 @@ static int __init pciback_init(void)
 {
 	int err;

+	if (!xen_initial_domain())
+		return -ENODEV;
+
 	err = pciback_config_init();
 	if (err)
 		return err;
diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
index 5be1350..a85c413 100644
--- a/drivers/xen/pciback/xenbus.c
+++ b/drivers/xen/pciback/xenbus.c
@@ -693,8 +693,6 @@ static struct xenbus_driver xenbus_pciback_driver = {

 int __init pciback_xenbus_register(void)
 {
-	if (!is_running_on_xen())
-		return -ENODEV;
 	pciback_wq = create_workqueue("pciback_workqueue");
 	if (!pciback_wq) {
 		printk(KERN_ERR "pciback_xenbus_register: create"
--
1.7.4


From c1139f912c1336538e51966d56e5905954052cba Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 13 Oct 2009 17:22:31 -0400
Subject: [PATCH 018/197] xen-pciback: Remove usage of pci_restore_bars() as Linux handles the power-up states correctly now.

Originally this code was pulled from the upstream kernel, and stuck
in the linux-2.6-sparse tree. At that point of time, the Linux tree (2.6.16?)
did not know how to handle this. Nowadays the pci_set_power_state routine
handles this case so we do not need this anymore.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/conf_space_capability_pm.c |   13 -------------
 1 files changed, 0 insertions(+), 13 deletions(-)

diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
index e2f99c7..e1d3af4 100644
--- a/drivers/xen/pciback/conf_space_capability_pm.c
+++ b/drivers/xen/pciback/conf_space_capability_pm.c
@@ -58,19 +58,6 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
 		goto out;
 	}

-	/*
-	 * Device may lose PCI config info on D3->D0 transition. This
-	 * is a problem for some guests which will not reset BARs. Even
-	 * those that have a go will be foiled by our BAR-write handler
-	 * which will discard the write! Since Linux won't re-init
-	 * the config space automatically in all cases, we do it here.
-	 * Future: Should we re-initialise all first 64 bytes of config space?
-	 */
-	if (new_state == PCI_D0 &&
-	    (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
-	    !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
-		pci_restore_bars(dev);
-
  out:
 	return err;
 }
--
1.7.4


From 721657d92623cfcf2f6f68c14abf97eb40fa6b20 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 13 Oct 2009 17:22:32 -0400
Subject: [PATCH 019/197] xen-pciback: Enable Xen-PCI-back to be compiled.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/Kconfig  |   65 ++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/Makefile |    1 +
 2 files changed, 66 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 6e6180c..d874453 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -29,6 +29,71 @@ config XEN_DEV_EVTCHN
 	  Support for backend device drivers that provide I/O services
 	  to other virtual machines.

+config XEN_PCIDEV_BACKEND
+	tristate "PCI-device backend driver"
+	depends on PCI
+	depends on XEN_BACKEND
+	help
+	  The PCI device backend driver allows the kernel to export arbitrary
+	  PCI devices to other guests. If you select this to be a module, you
+	  will need to make sure no other driver has bound to the device(s)
+	  you want to make visible to other guests.
+
+choice
+	prompt "PCI Backend Mode"
+	depends on XEN_PCIDEV_BACKEND
+	default XEN_PCIDEV_BACKEND_VPCI if !IA64
+	default XEN_PCIDEV_BACKEND_CONTROLLER if IA64
+
+config XEN_PCIDEV_BACKEND_VPCI
+	bool "Virtual PCI"
+	---help---
+	  This PCI Backend hides the true PCI topology and makes the frontend
+	  think there is a single PCI bus with only the exported devices on it.
+	  For example, a device at 03:05.0 will be re-assigned to 00:00.0. A
+	  second device at 02:1a.1 will be re-assigned to 00:01.1.
+
+config XEN_PCIDEV_BACKEND_PASS
+	bool "Passthrough"
+	---help---
+	  This PCI Backend provides a real view of the PCI topology to the
+	  frontend (for example, a device at 06:01.b will still appear at
+	  06:01.b to the frontend). This is similar to how Xen 2.0.x exposed
+	  PCI devices to its driver domains. This may be required for drivers
+	  which depend on finding their hardward in certain bus/slot
+	  locations.
+
+config XEN_PCIDEV_BACKEND_SLOT
+	bool "Slot"
+	---help---
+	  This PCI Backend hides the true PCI topology and makes the frontend
+	  think there is a single PCI bus with only the exported devices on it.
+	  Contrary to the virtual PCI backend, a function becomes a new slot.
+	  For example, a device at 03:05.2 will be re-assigned to 00:00.0. A
+	  second device at 02:1a.1 will be re-assigned to 00:01.0.
+
+config XEN_PCIDEV_BACKEND_CONTROLLER
+	bool "Controller"
+	depends on IA64
+	---help---
+	  This PCI backend virtualizes the PCI bus topology by providing a
+	  virtual bus per PCI root device.  Devices which are physically under
+	  the same root bus will appear on the same virtual bus.  For systems
+	  with complex I/O addressing, this is the only backend which supports
+	  extended I/O port spaces and MMIO translation offsets.  This backend
+	  also supports slot virtualization.  For example, a device at
+	  0000:01:02.1 will be re-assigned to 0000:00:00.0.  A second device
+	  at 0000:02:05.0 (behind a P2P bridge on bus 0000:01) will be
+	  re-assigned to 0000:00:01.0.  A third device at 0000:16:05.0 (under
+	  a different PCI root bus) will be re-assigned to 0000:01:00.0.
+
+endchoice
+
+config XEN_PCIDEV_BE_DEBUG
+	bool "PCI Backend Debugging"
+	depends on XEN_PCIDEV_BACKEND
+
+
 config XENFS
 	tristate "Xen filesystem"
 	default y
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index eb8a78d..3737dee 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
 obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
+obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
 obj-$(CONFIG_XENFS)		+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
--
1.7.4


From c164cd8577017d1c4e001b475fadddc7d2ff5c78 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 5 Nov 2009 15:25:43 -0500
Subject: [PATCH 020/197] xen-pciback: Return the physical IRQ number instead of the allocated IRQ number to pcifront.

The allocation of IRQ numbers in Linux privileged domains is based
on finding the first unbound IRQ number. After the allocation is done
a HYPERCALL to Xen is done, which allocates a PIRQ globally.
That PIRQ->IRQ binding is saved in data structures that are used
during ISR executions.

Before this patch, for non-privileged domains we would return the local
IRQ number instead of the PIRQ. The non-privileged domains require the
PIRQ so that they can attach the their own interrupt handler to it.
Fortunatly there is a function, 'xen_gsi_from_irq' that returns
that global IRQ number.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/conf_space_capability_msi.c |   12 ++++++++----
 1 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
index 762e396..7fb5371 100644
--- a/drivers/xen/pciback/conf_space_capability_msi.c
+++ b/drivers/xen/pciback/conf_space_capability_msi.c
@@ -6,6 +6,7 @@
 #include "conf_space.h"
 #include "conf_space_capability.h"
 #include <xen/interface/io/pciif.h>
+#include <xen/events.h>
 #include "pciback.h"

 int pciback_enable_msi(struct pciback_device *pdev,
@@ -22,7 +23,9 @@ int pciback_enable_msi(struct pciback_device *pdev,
 		return XEN_PCI_ERR_op_failed;
 	}

-	op->value = dev->irq;
+	/* The value the guest needs is actually the IDT vector, not the
+	 * the local domain's IRQ number. */
+	op->value = xen_gsi_from_irq(dev->irq);
 	return 0;
 }

@@ -31,7 +34,7 @@ int pciback_disable_msi(struct pciback_device *pdev,
 {
 	pci_disable_msi(dev);

-	op->value = dev->irq;
+	op->value = xen_gsi_from_irq(dev->irq);
 	return 0;
 }

@@ -57,7 +60,8 @@ int pciback_enable_msix(struct pciback_device *pdev,

 	for (i = 0; i < op->value; i++) {
 		op->msix_entries[i].entry = entries[i].entry;
-		op->msix_entries[i].vector = entries[i].vector;
+		op->msix_entries[i].vector =
+					xen_gsi_from_irq(entries[i].vector);
 	}

 	kfree(entries);
@@ -73,7 +77,7 @@ int pciback_disable_msix(struct pciback_device *pdev,

 	pci_disable_msix(dev);

-	op->value = dev->irq;
+	op->value = xen_gsi_from_irq(dev->irq);
 	return 0;
 }

--
1.7.4


From b0b035f1de3282aa96a6dc28007d513e8fce793d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 5 Nov 2009 15:25:44 -0500
Subject: [PATCH 021/197] xen-pciback: Fix checkpatch warnings and errors for pciback/ directory.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/conf_space.c                |   34 +++---
 drivers/xen/pciback/conf_space.h                |   30 ++--
 drivers/xen/pciback/conf_space_capability.c     |    5 +-
 drivers/xen/pciback/conf_space_capability.h     |    3 +
 drivers/xen/pciback/conf_space_capability_msi.c |    3 +-
 drivers/xen/pciback/conf_space_capability_pm.c  |    4 +-
 drivers/xen/pciback/conf_space_capability_vpd.c |    2 +-
 drivers/xen/pciback/conf_space_header.c         |    7 +-
 drivers/xen/pciback/conf_space_quirks.c         |   16 ++-
 drivers/xen/pciback/controller.c                |   15 +-
 drivers/xen/pciback/passthrough.c               |    6 +-
 drivers/xen/pciback/pci_stub.c                  |  165 +++++++++++------------
 drivers/xen/pciback/pciback.h                   |   28 +++--
 drivers/xen/pciback/pciback_ops.c               |   74 +++++------
 drivers/xen/pciback/slot.c                      |   22 ++--
 drivers/xen/pciback/vpci.c                      |   28 ++--
 drivers/xen/pciback/xenbus.c                    |   42 +++---
 17 files changed, 245 insertions(+), 239 deletions(-)

diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
index 0c76db1..370c18e 100644
--- a/drivers/xen/pciback/conf_space.c
+++ b/drivers/xen/pciback/conf_space.c
@@ -18,11 +18,11 @@
 static int permissive;
 module_param(permissive, bool, 0644);

-#define DEFINE_PCI_CONFIG(op,size,type) 			\
+#define DEFINE_PCI_CONFIG(op, size, type) 			\
 int pciback_##op##_config_##size 				\
 (struct pci_dev *dev, int offset, type value, void *data)	\
 {								\
-	return pci_##op##_config_##size (dev, offset, value);	\
+	return pci_##op##_config_##size(dev, offset, value);	\
 }

 DEFINE_PCI_CONFIG(read, byte, u8 *)
@@ -139,14 +139,15 @@ static int pcibios_err_to_errno(int err)
 }

 int pciback_config_read(struct pci_dev *dev, int offset, int size,
-			u32 * ret_val)
+			u32 *ret_val)
 {
 	int err = 0;
 	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
 	const struct config_field_entry *cfg_entry;
 	const struct config_field *field;
 	int req_start, req_end, field_start, field_end;
-	/* if read fails for any reason, return 0 (as if device didn't respond) */
+	/* if read fails for any reason, return 0
+	 * (as if device didn't respond) */
 	u32 value = 0, tmp_val;

 	if (unlikely(verbose_request))
@@ -161,10 +162,10 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size,
 	/* Get the real value first, then modify as appropriate */
 	switch (size) {
 	case 1:
-		err = pci_read_config_byte(dev, offset, (u8 *) & value);
+		err = pci_read_config_byte(dev, offset, (u8 *) &value);
 		break;
 	case 2:
-		err = pci_read_config_word(dev, offset, (u16 *) & value);
+		err = pci_read_config_word(dev, offset, (u16 *) &value);
 		break;
 	case 4:
 		err = pci_read_config_dword(dev, offset, &value);
@@ -192,7 +193,7 @@ int pciback_config_read(struct pci_dev *dev, int offset, int size,
 		}
 	}

-      out:
+out:
 	if (unlikely(verbose_request))
 		printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
 		       pci_name(dev), size, offset, value);
@@ -276,8 +277,8 @@ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
 		} else if (!dev_data->warned_on_write) {
 			dev_data->warned_on_write = 1;
 			dev_warn(&dev->dev, "Driver tried to write to a "
-				 "read-only configuration space field at offset "
-				 "0x%x, size %d. This may be harmless, but if "
+				 "read-only configuration space field at offset"
+				 " 0x%x, size %d. This may be harmless, but if "
 				 "you have problems with your device:\n"
 				 "1) see permissive attribute in sysfs\n"
 				 "2) report problems to the xen-devel "
@@ -295,8 +296,8 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev)
 	struct config_field_entry *cfg_entry, *t;
 	const struct config_field *field;

-	dev_dbg(&dev->dev,
-		"free-ing dynamically allocated virtual configuration space fields\n");
+	dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
+			   "configuration space fields\n");
 	if (!dev_data)
 		return;

@@ -306,8 +307,7 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev)
 		if (field->clean) {
 			field->clean((struct config_field *)field);

-			if (cfg_entry->data)
-				kfree(cfg_entry->data);
+			kfree(cfg_entry->data);

 			list_del(&cfg_entry->list);
 			kfree(cfg_entry);
@@ -376,7 +376,7 @@ int pciback_config_add_field_offset(struct pci_dev *dev,
 	cfg_entry->base_offset = base_offset;

 	/* silently ignore duplicate fields */
-	err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
+	err = pciback_field_is_dup(dev, OFFSET(cfg_entry));
 	if (err)
 		goto out;

@@ -395,14 +395,14 @@ int pciback_config_add_field_offset(struct pci_dev *dev,
 		OFFSET(cfg_entry));
 	list_add_tail(&cfg_entry->list, &dev_data->config_fields);

-      out:
+out:
 	if (err)
 		kfree(cfg_entry);

 	return err;
 }

-/* This sets up the device's virtual configuration space to keep track of
+/* This sets up the device's virtual configuration space to keep track of
  * certain registers (like the base address registers (BARs) so that we can
  * keep the client from manipulating them directly.
  */
@@ -425,7 +425,7 @@ int pciback_config_init_dev(struct pci_dev *dev)

 	err = pciback_config_quirks_init(dev);

-      out:
+out:
 	return err;
 }

diff --git a/drivers/xen/pciback/conf_space.h b/drivers/xen/pciback/conf_space.h
index fe746ef..50ebef2 100644
--- a/drivers/xen/pciback/conf_space.h
+++ b/drivers/xen/pciback/conf_space.h
@@ -11,21 +11,21 @@
 #include <linux/err.h>

 /* conf_field_init can return an errno in a ptr with ERR_PTR() */
-typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
-typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
-typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
+typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
+typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
+typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);

-typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
+typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
 				 void *data);
-typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
+typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
 				void *data);
-typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
+typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
 				void *data);
-typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
+typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
 				void *data);
-typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
+typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
 			       void *data);
-typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
+typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
 			       void *data);

 /* These are the fields within the configuration space which we
@@ -39,7 +39,7 @@ struct config_field {
 	conf_field_init init;
 	conf_field_reset reset;
 	conf_field_free release;
-	void (*clean) (struct config_field * field);
+	void (*clean) (struct config_field *field);
 	union {
 		struct {
 			conf_dword_write write;
@@ -92,8 +92,8 @@ static inline int pciback_config_add_fields(struct pci_dev *dev,
 }

 static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
-						   const struct config_field *field,
-						   unsigned int offset)
+					const struct config_field *field,
+					unsigned int offset)
 {
 	int i, err = 0;
 	for (i = 0; field[i].size != 0; i++) {
@@ -105,11 +105,11 @@ static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
 }

 /* Read/Write the real configuration space */
-int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
+int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
 			     void *data);
-int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
+int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value,
 			     void *data);
-int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
+int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
 			      void *data);
 int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
 			      void *data);
diff --git a/drivers/xen/pciback/conf_space_capability.c b/drivers/xen/pciback/conf_space_capability.c
index 50efca4..0ea84d6 100644
--- a/drivers/xen/pciback/conf_space_capability.c
+++ b/drivers/xen/pciback/conf_space_capability.c
@@ -53,13 +53,10 @@ int pciback_config_capability_add_fields(struct pci_dev *dev)
 		}
 	}

-      out:
+out:
 	return err;
 }

-extern struct pciback_config_capability pciback_config_capability_vpd;
-extern struct pciback_config_capability pciback_config_capability_pm;
-
 int pciback_config_capability_init(void)
 {
 	register_capability(&pciback_config_capability_vpd);
diff --git a/drivers/xen/pciback/conf_space_capability.h b/drivers/xen/pciback/conf_space_capability.h
index 823392e..8da3ac4 100644
--- a/drivers/xen/pciback/conf_space_capability.h
+++ b/drivers/xen/pciback/conf_space_capability.h
@@ -20,4 +20,7 @@ struct pciback_config_capability {
 	const struct config_field *fields;
 };

+extern struct pciback_config_capability pciback_config_capability_vpd;
+extern struct pciback_config_capability pciback_config_capability_pm;
+
 #endif
diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
index 7fb5371..b70ea8b 100644
--- a/drivers/xen/pciback/conf_space_capability_msi.c
+++ b/drivers/xen/pciback/conf_space_capability_msi.c
@@ -18,7 +18,8 @@ int pciback_enable_msi(struct pciback_device *pdev,
 	status = pci_enable_msi(dev);

 	if (status) {
-		printk("error enable msi for guest %x status %x\n", otherend, status);
+		printk(KERN_ERR "error enable msi for guest %x status %x\n",
+			otherend, status);
 		op->value = 0;
 		return XEN_PCI_ERR_op_failed;
 	}
diff --git a/drivers/xen/pciback/conf_space_capability_pm.c b/drivers/xen/pciback/conf_space_capability_pm.c
index e1d3af4..0442616 100644
--- a/drivers/xen/pciback/conf_space_capability_pm.c
+++ b/drivers/xen/pciback/conf_space_capability_pm.c
@@ -20,7 +20,7 @@ static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,

 	*value = real_value & ~PCI_PM_CAP_PME_MASK;

-      out:
+out:
 	return err;
 }

@@ -77,7 +77,7 @@ static void *pm_ctrl_init(struct pci_dev *dev, int offset)
 		err = pci_write_config_word(dev, offset, value);
 	}

-      out:
+out:
 	return ERR_PTR(err);
 }

diff --git a/drivers/xen/pciback/conf_space_capability_vpd.c b/drivers/xen/pciback/conf_space_capability_vpd.c
index 920cb4a..e7b4d66 100644
--- a/drivers/xen/pciback/conf_space_capability_vpd.c
+++ b/drivers/xen/pciback/conf_space_capability_vpd.c
@@ -33,7 +33,7 @@ static const struct config_field caplist_vpd[] = {
 	 },
 	{}
 };
-
+
 struct pciback_config_capability pciback_config_capability_vpd = {
 	.capability = PCI_CAP_ID_VPD,
 	.fields = caplist_vpd,
diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
index 5a9e028..3ae7da1 100644
--- a/drivers/xen/pciback/conf_space_header.c
+++ b/drivers/xen/pciback/conf_space_header.c
@@ -51,7 +51,8 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 		err = pci_set_mwi(dev);
 		if (err) {
 			printk(KERN_WARNING
-			       "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
+			       "pciback: %s: cannot enable "
+			       "memory-write-invalidate (%d)\n",
 			       pci_name(dev), err);
 			value &= ~PCI_COMMAND_INVALIDATE;
 		}
@@ -206,7 +207,7 @@ static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
 	    || value == PCI_BIST_START)
 		err = pci_write_config_byte(dev, offset, value);

-      out:
+out:
 	return err;
 }

@@ -312,6 +313,6 @@ int pciback_config_header_add_fields(struct pci_dev *dev)
 		break;
 	}

-      out:
+out:
 	return err;
 }
diff --git a/drivers/xen/pciback/conf_space_quirks.c b/drivers/xen/pciback/conf_space_quirks.c
index 244a438..45c31fb 100644
--- a/drivers/xen/pciback/conf_space_quirks.c
+++ b/drivers/xen/pciback/conf_space_quirks.c
@@ -18,8 +18,10 @@ match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
 {
 	if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
 	    (id->device == PCI_ANY_ID || id->device == dev->device) &&
-	    (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
-	    (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
+	    (id->subvendor == PCI_ANY_ID ||
+				id->subvendor == dev->subsystem_vendor) &&
+	    (id->subdevice == PCI_ANY_ID ||
+				id->subdevice == dev->subsystem_device) &&
 	    !((id->class ^ dev->class) & id->class_mask))
 		return id;
 	return NULL;
@@ -35,7 +37,7 @@ struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
 	tmp_quirk = NULL;
 	printk(KERN_DEBUG
 	       "quirk didn't match any device pciback knows about\n");
-      out:
+out:
 	return tmp_quirk;
 }

@@ -51,7 +53,7 @@ int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
 	struct config_field_entry *cfg_entry;

 	list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
-		if ( OFFSET(cfg_entry) == reg) {
+		if (OFFSET(cfg_entry) == reg) {
 			ret = 1;
 			break;
 		}
@@ -84,7 +86,7 @@ int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field

 	pciback_config_add_field(dev, field);

-      out:
+out:
 	return err;
 }

@@ -110,7 +112,7 @@ int pciback_config_quirks_init(struct pci_dev *dev)
 	quirk->pdev = dev;

 	register_quirk(quirk);
-      out:
+out:
 	return ret;
 }

@@ -133,6 +135,6 @@ int pciback_config_quirk_release(struct pci_dev *dev)
 	list_del(&quirk->quirks_list);
 	kfree(quirk);

-      out:
+out:
 	return ret;
 }
diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
index 294e48f..7f04f11 100644
--- a/drivers/xen/pciback/controller.c
+++ b/drivers/xen/pciback/controller.c
@@ -259,7 +259,7 @@ static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
 	    !(addr.resource_type == ACPI_IO_RANGE &&
 	      addr.info.io.translation))
 		return AE_OK;
-
+
 	/* Store the resource in xenbus for the guest */
 	len = snprintf(str, sizeof(str), "root-%d-resource-%d",
 		       info->root_num, info->resource_count);
@@ -314,7 +314,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
 			goto out;

 		/*
- 		 * Now figure out which root-%d this belongs to
+		 * Now figure out which root-%d this belongs to
 		 * so we can associate resources with it.
 		 */
 		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
@@ -407,8 +407,8 @@ void pciback_release_devices(struct pciback_device *pdev)
 	pdev->pci_dev_data = NULL;
 }

-int pciback_get_pcifront_dev(struct pci_dev *pcidev,
-		struct pciback_device *pdev,
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
+		struct pciback_device *pdev,
 		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
 {
 	struct controller_dev_data *dev_data = pdev->pci_dev_data;
@@ -420,13 +420,12 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev,

 	list_for_each_entry(cntrl_entry, &dev_data->list, list) {
 		list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
-			if ( (dev_entry->dev->bus->number ==
+			if ((dev_entry->dev->bus->number ==
 					pcidev->bus->number) &&
-			  	(dev_entry->dev->devfn ==
+				(dev_entry->dev->devfn ==
 					pcidev->devfn) &&
 				(pci_domain_nr(dev_entry->dev->bus) ==
-					pci_domain_nr(pcidev->bus)))
-			{
+					pci_domain_nr(pcidev->bus))) {
 				found = 1;
 				*domain = cntrl_entry->domain;
 				*bus = cntrl_entry->bus;
diff --git a/drivers/xen/pciback/passthrough.c b/drivers/xen/pciback/passthrough.c
index 9e7a0c4..5386bebf 100644
--- a/drivers/xen/pciback/passthrough.c
+++ b/drivers/xen/pciback/passthrough.c
@@ -165,8 +165,10 @@ void pciback_release_devices(struct pciback_device *pdev)
 	pdev->pci_dev_data = NULL;
 }

-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
-		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
+			     struct pciback_device *pdev,
+			     unsigned int *domain, unsigned int *bus,
+			     unsigned int *devfn)

 {
 	*domain = pci_domain_nr(pcidev->bus);
diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index d97dac5..28222ee 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -20,7 +20,7 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"

-static char *pci_devs_to_hide = NULL;
+static char *pci_devs_to_hide;
 wait_queue_head_t aer_wait_queue;
 /*Add sem for sync AER handling and pciback remove/reconfigue ops,
 * We want to avoid in middle of AER ops, pciback devices is being removed
@@ -43,7 +43,7 @@ struct pcistub_device {
 	spinlock_t lock;

 	struct pci_dev *dev;
-	struct pciback_device *pdev;	/* non-NULL if struct pci_dev is in use */
+	struct pciback_device *pdev;/* non-NULL if struct pci_dev is in use */
 };

 /* Access to pcistub_devices & seized_devices lists and the initialize_devices
@@ -55,7 +55,7 @@ static LIST_HEAD(pcistub_devices);
 /* wait for device_initcall before initializing our devices
  * (see pcistub_init_devices_late)
  */
-static int initialize_devices = 0;
+static int initialize_devices;
 static LIST_HEAD(seized_devices);

 static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
@@ -132,7 +132,7 @@ static struct pcistub_device *pcistub_device_find(int domain, int bus,
 	/* didn't find it */
 	psdev = NULL;

-      out:
+out:
 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
 	return psdev;
 }
@@ -321,10 +321,10 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)

 	return 0;

-      config_release:
+config_release:
 	pciback_config_free_dev(dev);

-      out:
+out:
 	pci_set_drvdata(dev, NULL);
 	kfree(dev_data);
 	return err;
@@ -443,7 +443,7 @@ static int __devinit pcistub_probe(struct pci_dev *dev,
 		/* Didn't find the device */
 		err = -ENODEV;

-      out:
+out:
 	return err;
 }

@@ -511,26 +511,24 @@ static void kill_domain_by_device(struct pcistub_device *psdev)
 	int err;
 	char nodename[1024];

-	if (!psdev)
+	if (!psdev)
 		dev_err(&psdev->dev->dev,
 			"device is NULL when do AER recovery/kill_domain\n");
-	sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
+	sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
 		psdev->pdev->xdev->otherend_id);
 	nodename[strlen(nodename)] = '\0';

 again:
 	err = xenbus_transaction_start(&xbt);
-	if (err)
-	{
+	if (err) {
 		dev_err(&psdev->dev->dev,
 			"error %d when start xenbus transaction\n", err);
 		return;
 	}
 	/*PV AER handlers will set this flag*/
-	xenbus_printf(xbt, nodename, "aerState" , "aerfail" );
+	xenbus_printf(xbt, nodename, "aerState" , "aerfail");
 	err = xenbus_transaction_end(xbt, 0);
-	if (err)
-	{
+	if (err) {
 		if (err == -EAGAIN)
 			goto again;
 		dev_err(&psdev->dev->dev,
@@ -541,9 +539,9 @@ again:

 /* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
  * backend need to have cooperation. In pciback, those steps will do similar
- * jobs: send service request and waiting for front_end response.
+ * jobs: send service request and waiting for front_end response.
 */
-static pci_ers_result_t common_process(struct pcistub_device *psdev,
+static pci_ers_result_t common_process(struct pcistub_device *psdev,
 		pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
 {
 	pci_ers_result_t res = result;
@@ -561,12 +559,12 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
 	if (!ret) {
 		dev_err(&psdev->dev->dev,
 			"pciback: failed to get pcifront device\n");
-		return PCI_ERS_RESULT_NONE;
+		return PCI_ERS_RESULT_NONE;
 	}
 	wmb();

-	dev_dbg(&psdev->dev->dev,
-			"pciback: aer_op %x dom %x bus %x devfn %x\n",
+	dev_dbg(&psdev->dev->dev,
+			"pciback: aer_op %x dom %x bus %x devfn %x\n",
 			aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
 	/*local flag to mark there's aer request, pciback callback will use this
 	* flag to judge whether we need to check pci-front give aer service
@@ -575,21 +573,21 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
 	set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);

 	/*It is possible that a pcifront conf_read_write ops request invokes
-	* the callback which cause the spurious execution of wake_up.
+	* the callback which cause the spurious execution of wake_up.
 	* Yet it is harmless and better than a spinlock here
 	*/
-	set_bit(_XEN_PCIB_active,
+	set_bit(_XEN_PCIB_active,
 		(unsigned long *)&psdev->pdev->sh_info->flags);
 	wmb();
 	notify_remote_via_irq(psdev->pdev->evtchn_irq);

 	ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
-                (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
+		(unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);

 	if (!ret) {
-		if (test_bit(_XEN_PCIB_active,
+		if (test_bit(_XEN_PCIB_active,
 			(unsigned long *)&psdev->pdev->sh_info->flags)) {
-			dev_err(&psdev->dev->dev,
+			dev_err(&psdev->dev->dev,
 				"pcifront aer process not responding!\n");
 			clear_bit(_XEN_PCIB_active,
 			  (unsigned long *)&psdev->pdev->sh_info->flags);
@@ -599,16 +597,16 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
 	}
 	clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);

-	if ( test_bit( _XEN_PCIF_active,
-		(unsigned long*)&psdev->pdev->sh_info->flags)) {
-		dev_dbg(&psdev->dev->dev,
+	if (test_bit(_XEN_PCIF_active,
+		(unsigned long *)&psdev->pdev->sh_info->flags)) {
+		dev_dbg(&psdev->dev->dev,
 			"schedule pci_conf service in pciback \n");
 		test_and_schedule_op(psdev->pdev);
 	}

 	res = (pci_ers_result_t)aer_op->err;
 	return res;
-}
+}

 /*
 * pciback_slot_reset: it will send the slot_reset request to  pcifront in case
@@ -632,24 +630,22 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
 				PCI_SLOT(dev->devfn),
 				PCI_FUNC(dev->devfn));

-	if ( !psdev || !psdev->pdev )
-	{
-		dev_err(&dev->dev,
+	if (!psdev || !psdev->pdev) {
+		dev_err(&dev->dev,
 			"pciback device is not found/assigned\n");
 		goto end;
 	}

-	if ( !psdev->pdev->sh_info )
-	{
+	if (!psdev->pdev->sh_info) {
 		dev_err(&dev->dev, "pciback device is not connected or owned"
 			" by HVM, kill it\n");
 		kill_domain_by_device(psdev);
 		goto release;
 	}

-	if ( !test_bit(_XEN_PCIB_AERHANDLER,
-		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
-		dev_err(&dev->dev,
+	if (!test_bit(_XEN_PCIB_AERHANDLER,
+		(unsigned long *)&psdev->pdev->sh_info->flags)) {
+		dev_err(&dev->dev,
 			"guest with no AER driver should have been killed\n");
 		goto release;
 	}
@@ -657,7 +653,7 @@ static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)

 	if (result == PCI_ERS_RESULT_NONE ||
 		result == PCI_ERS_RESULT_DISCONNECT) {
-		dev_dbg(&dev->dev,
+		dev_dbg(&dev->dev,
 			"No AER slot_reset service or disconnected!\n");
 		kill_domain_by_device(psdev);
 	}
@@ -670,9 +666,9 @@ end:
 }


-/*pciback_mmio_enabled: it will send the mmio_enabled request to  pcifront
-* in case of the device driver could provide this service, and then wait
-* for pcifront ack.
+/*pciback_mmio_enabled: it will send the mmio_enabled request to  pcifront
+* in case of the device driver could provide this service, and then wait
+* for pcifront ack
 * @dev: pointer to PCI devices
 * return value is used by aer_core do_recovery policy
 */
@@ -692,24 +688,22 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
 				PCI_SLOT(dev->devfn),
 				PCI_FUNC(dev->devfn));

-	if ( !psdev || !psdev->pdev )
-	{
-		dev_err(&dev->dev,
+	if (!psdev || !psdev->pdev) {
+		dev_err(&dev->dev,
 			"pciback device is not found/assigned\n");
 		goto end;
 	}

-	if ( !psdev->pdev->sh_info )
-	{
+	if (!psdev->pdev->sh_info) {
 		dev_err(&dev->dev, "pciback device is not connected or owned"
 			" by HVM, kill it\n");
 		kill_domain_by_device(psdev);
 		goto release;
 	}

-	if ( !test_bit(_XEN_PCIB_AERHANDLER,
-		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
-		dev_err(&dev->dev,
+	if (!test_bit(_XEN_PCIB_AERHANDLER,
+		(unsigned long *)&psdev->pdev->sh_info->flags)) {
+		dev_err(&dev->dev,
 			"guest with no AER driver should have been killed\n");
 		goto release;
 	}
@@ -717,7 +711,7 @@ static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)

 	if (result == PCI_ERS_RESULT_NONE ||
 		result == PCI_ERS_RESULT_DISCONNECT) {
-		dev_dbg(&dev->dev,
+		dev_dbg(&dev->dev,
 			"No AER mmio_enabled service or disconnected!\n");
 		kill_domain_by_device(psdev);
 	}
@@ -728,8 +722,8 @@ end:
 	return result;
 }

-/*pciback_error_detected: it will send the error_detected request to  pcifront
-* in case of the device driver could provide this service, and then wait
+/*pciback_error_detected: it will send the error_detected request to  pcifront
+* in case of the device driver could provide this service, and then wait
 * for pcifront ack.
 * @dev: pointer to PCI devices
 * @error: the current PCI connection state
@@ -752,15 +746,13 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
 				PCI_SLOT(dev->devfn),
 				PCI_FUNC(dev->devfn));

-	if ( !psdev || !psdev->pdev )
-	{
-		dev_err(&dev->dev,
+	if (!psdev || !psdev->pdev) {
+		dev_err(&dev->dev,
 			"pciback device is not found/assigned\n");
 		goto end;
 	}

-	if ( !psdev->pdev->sh_info )
-	{
+	if (!psdev->pdev->sh_info) {
 		dev_err(&dev->dev, "pciback device is not connected or owned"
 			" by HVM, kill it\n");
 		kill_domain_by_device(psdev);
@@ -768,8 +760,8 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
 	}

 	/*Guest owns the device yet no aer handler regiested, kill guest*/
-	if ( !test_bit(_XEN_PCIB_AERHANDLER,
-		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
+	if (!test_bit(_XEN_PCIB_AERHANDLER,
+		(unsigned long *)&psdev->pdev->sh_info->flags)) {
 		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
 		kill_domain_by_device(psdev);
 		goto release;
@@ -778,7 +770,7 @@ static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,

 	if (result == PCI_ERS_RESULT_NONE ||
 		result == PCI_ERS_RESULT_DISCONNECT) {
-		dev_dbg(&dev->dev,
+		dev_dbg(&dev->dev,
 			"No AER error_detected service or disconnected!\n");
 		kill_domain_by_device(psdev);
 	}
@@ -789,8 +781,8 @@ end:
 	return result;
 }

-/*pciback_error_resume: it will send the error_resume request to  pcifront
-* in case of the device driver could provide this service, and then wait
+/*pciback_error_resume: it will send the error_resume request to  pcifront
+* in case of the device driver could provide this service, and then wait
 * for pcifront ack.
 * @dev: pointer to PCI devices
 */
@@ -808,29 +800,28 @@ static void pciback_error_resume(struct pci_dev *dev)
 				PCI_SLOT(dev->devfn),
 				PCI_FUNC(dev->devfn));

-	if ( !psdev || !psdev->pdev )
-	{
-		dev_err(&dev->dev,
+	if (!psdev || !psdev->pdev) {
+		dev_err(&dev->dev,
 			"pciback device is not found/assigned\n");
 		goto end;
 	}

-	if ( !psdev->pdev->sh_info )
-	{
+	if (!psdev->pdev->sh_info) {
 		dev_err(&dev->dev, "pciback device is not connected or owned"
 			" by HVM, kill it\n");
 		kill_domain_by_device(psdev);
 		goto release;
 	}

-	if ( !test_bit(_XEN_PCIB_AERHANDLER,
-		(unsigned long *)&psdev->pdev->sh_info->flags) ) {
-		dev_err(&dev->dev,
+	if (!test_bit(_XEN_PCIB_AERHANDLER,
+		(unsigned long *)&psdev->pdev->sh_info->flags)) {
+		dev_err(&dev->dev,
 			"guest with no AER driver should have been killed\n");
 		kill_domain_by_device(psdev);
 		goto release;
 	}
-	common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED);
+	common_process(psdev, 1, XEN_PCI_OP_aer_resume,
+		       PCI_ERS_RESULT_RECOVERED);
 release:
 	pcistub_device_put(psdev);
 end:
@@ -923,8 +914,8 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
 	unsigned long flags;

 	spin_lock_irqsave(&device_ids_lock, flags);
-	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
-
+	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
+				 slot_list) {
 		if (pci_dev_id->domain == domain
 		    && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
 			/* Don't break; here because it's possible the same
@@ -976,7 +967,7 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
 	err = pciback_config_quirks_add_field(dev, field);
 	if (err)
 		kfree(field);
-      out:
+out:
 	return err;
 }

@@ -992,7 +983,7 @@ static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,

 	err = pcistub_device_id_add(domain, bus, slot, func);

-      out:
+out:
 	if (!err)
 		err = count;
 	return err;
@@ -1012,7 +1003,7 @@ static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,

 	err = pcistub_device_id_remove(domain, bus, slot, func);

-      out:
+out:
 	if (!err)
 		err = count;
 	return err;
@@ -1057,7 +1048,7 @@ static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,

 	err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);

-      out:
+out:
 	if (!err)
 		err = count;
 	return err;
@@ -1067,7 +1058,6 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
 {
 	int count = 0;
 	unsigned long flags;
-	extern struct list_head pciback_quirks;
 	struct pciback_config_quirk *quirk;
 	struct pciback_dev_data *dev_data;
 	const struct config_field *field;
@@ -1096,12 +1086,13 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)

 			count += scnprintf(buf + count, PAGE_SIZE - count,
 					   "\t\t%08x:%01x:%08x\n",
-					   cfg_entry->base_offset + field->offset,
-					   field->size, field->mask);
+					   cfg_entry->base_offset +
+					   field->offset, field->size,
+					   field->mask);
 		}
 	}

-      out:
+out:
 	spin_unlock_irqrestore(&device_ids_lock, flags);

 	return count;
@@ -1137,14 +1128,14 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf,
 	if (!dev_data->permissive) {
 		dev_data->permissive = 1;
 		/* Let user know that what they're doing could be unsafe */
-		dev_warn(&psdev->dev->dev,
-			 "enabling permissive mode configuration space accesses!\n");
+		dev_warn(&psdev->dev->dev, "enabling permissive mode "
+			 "configuration space accesses!\n");
 		dev_warn(&psdev->dev->dev,
 			 "permissive mode is potentially unsafe!\n");
 	}
-      release:
+release:
 	pcistub_device_put(psdev);
-      out:
+out:
 	if (!err)
 		err = count;
 	return err;
@@ -1264,10 +1255,10 @@ static int __init pcistub_init(void)
 	if (err)
 		pcistub_exit();

-      out:
+out:
 	return err;

-      parse_error:
+parse_error:
 	printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
 	       pci_devs_to_hide + pos);
 	return -EINVAL;
@@ -1276,7 +1267,7 @@ static int __init pcistub_init(void)
 #ifndef MODULE
 /*
  * fs_initcall happens before device_initcall
- * so pciback *should* get called first (b/c we
+ * so pciback *should* get called first (b/c we
  * want to suck up any device before other drivers
  * get a chance by being the first pci device
  * driver to register)
diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
index 5e8e14e..98e2912 100644
--- a/drivers/xen/pciback/pciback.h
+++ b/drivers/xen/pciback/pciback.h
@@ -49,6 +49,12 @@ struct pciback_dev_data {
 	int warned_on_write;
 };

+/* Used by XenBus and pciback_ops.c */
+extern wait_queue_head_t aer_wait_queue;
+extern struct workqueue_struct *pciback_wq;
+/* Used by pcistub.c and conf_space_quirks.c */
+extern struct list_head pciback_quirks;
+
 /* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
 struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
 					    int domain, int bus,
@@ -67,14 +73,14 @@ void pciback_config_free_dyn_fields(struct pci_dev *dev);
 void pciback_config_reset_dev(struct pci_dev *dev);
 void pciback_config_free_dev(struct pci_dev *dev);
 int pciback_config_read(struct pci_dev *dev, int offset, int size,
-			u32 * ret_val);
+			u32 *ret_val);
 int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);

 /* Handle requests for specific devices from the frontend */
 typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
 				   unsigned int domain, unsigned int bus,
 				   unsigned int devfn, unsigned int devid);
-typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
+typedef int (*publish_pci_root_cb) (struct pciback_device *pdev,
 				    unsigned int domain, unsigned int bus);
 int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
 			int devid, publish_pci_dev_cb publish_cb);
@@ -83,15 +89,17 @@ struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
 				    unsigned int domain, unsigned int bus,
 				    unsigned int devfn);

-/**
+/**
 * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
-* before sending aer request to pcifront, so that guest could identify
+* before sending aer request to pcifront, so that guest could identify
 * device, coopearte with pciback to finish aer recovery job if device driver
 * has the capability
 */

-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
-				unsigned int *domain, unsigned int *bus, unsigned int *devfn);
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
+			     struct pciback_device *pdev,
+			     unsigned int *domain, unsigned int *bus,
+			     unsigned int *devfn);
 int pciback_init_devices(struct pciback_device *pdev);
 int pciback_publish_pci_roots(struct pciback_device *pdev,
 			      publish_pci_root_cb cb);
@@ -106,17 +114,17 @@ void pciback_xenbus_unregister(void);

 #ifdef CONFIG_PCI_MSI
 int pciback_enable_msi(struct pciback_device *pdev,
-                       struct pci_dev *dev, struct xen_pci_op *op);
+			struct pci_dev *dev, struct xen_pci_op *op);

 int pciback_disable_msi(struct pciback_device *pdev,
-                         struct pci_dev *dev, struct xen_pci_op *op);
+			struct pci_dev *dev, struct xen_pci_op *op);


 int pciback_enable_msix(struct pciback_device *pdev,
-                        struct pci_dev *dev, struct xen_pci_op *op);
+			struct pci_dev *dev, struct xen_pci_op *op);

 int pciback_disable_msix(struct pciback_device *pdev,
-                        struct pci_dev *dev, struct xen_pci_op *op);
+			struct pci_dev *dev, struct xen_pci_op *op);
 #endif
 extern int verbose_request;

diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
index 6624faf..bf83dca 100644
--- a/drivers/xen/pciback/pciback_ops.c
+++ b/drivers/xen/pciback/pciback_ops.c
@@ -5,11 +5,11 @@
  */
 #include <linux/module.h>
 #include <linux/wait.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <xen/events.h>
 #include "pciback.h"

-int verbose_request = 0;
+int verbose_request;
 module_param(verbose_request, int, 0644);

 /* Ensure a device is "turned off" and ready to be exported.
@@ -37,12 +37,10 @@ void pciback_reset_device(struct pci_dev *dev)
 		}
 	}
 }
-extern wait_queue_head_t aer_wait_queue;
-extern struct workqueue_struct *pciback_wq;
 /*
 * Now the same evtchn is used for both pcifront conf_read_write request
 * as well as pcie aer front end ack. We use a new work_queue to schedule
-* pciback conf_read_write service for avoiding confict with aer_core
+* pciback conf_read_write service for avoiding confict with aer_core
 * do_recovery job which also use the system default work_queue
 */
 void test_and_schedule_op(struct pciback_device *pdev)
@@ -50,14 +48,13 @@ void test_and_schedule_op(struct pciback_device *pdev)
 	/* Check that frontend is requesting an operation and that we are not
 	 * already processing a request */
 	if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
-	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
-	{
+	    && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
 		queue_work(pciback_wq, &pdev->op_work);
 	}
 	/*_XEN_PCIB_active should have been cleared by pcifront. And also make
 	sure pciback is waiting for ack by checking _PCIB_op_pending*/
-	if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags)
-	    &&test_bit(_PCIB_op_pending, &pdev->flags)) {
+	if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
+	    && test_bit(_PCIB_op_pending, &pdev->flags)) {
 		wake_up(&aer_wait_queue);
 	}
 }
@@ -69,7 +66,8 @@ void test_and_schedule_op(struct pciback_device *pdev)

 void pciback_do_op(struct work_struct *data)
 {
-	struct pciback_device *pdev = container_of(data, struct pciback_device, op_work);
+	struct pciback_device *pdev =
+		container_of(data, struct pciback_device, op_work);
 	struct pci_dev *dev;
 	struct xen_pci_op *op = &pdev->sh_info->op;

@@ -77,38 +75,36 @@ void pciback_do_op(struct work_struct *data)

 	if (dev == NULL)
 		op->err = XEN_PCI_ERR_dev_not_found;
-	else
-	{
-		switch (op->cmd)
-		{
-			case XEN_PCI_OP_conf_read:
-				op->err = pciback_config_read(dev,
-					  op->offset, op->size, &op->value);
-				break;
-			case XEN_PCI_OP_conf_write:
-				op->err = pciback_config_write(dev,
-					  op->offset, op->size,	op->value);
-				break;
+	else {
+		switch (op->cmd) {
+		case XEN_PCI_OP_conf_read:
+			op->err = pciback_config_read(dev,
+				  op->offset, op->size, &op->value);
+			break;
+		case XEN_PCI_OP_conf_write:
+			op->err = pciback_config_write(dev,
+				  op->offset, op->size,	op->value);
+			break;
 #ifdef CONFIG_PCI_MSI
-			case XEN_PCI_OP_enable_msi:
-				op->err = pciback_enable_msi(pdev, dev, op);
-				break;
-			case XEN_PCI_OP_disable_msi:
-				op->err = pciback_disable_msi(pdev, dev, op);
-				break;
-			case XEN_PCI_OP_enable_msix:
-				op->err = pciback_enable_msix(pdev, dev, op);
-				break;
-			case XEN_PCI_OP_disable_msix:
-				op->err = pciback_disable_msix(pdev, dev, op);
-				break;
+		case XEN_PCI_OP_enable_msi:
+			op->err = pciback_enable_msi(pdev, dev, op);
+			break;
+		case XEN_PCI_OP_disable_msi:
+			op->err = pciback_disable_msi(pdev, dev, op);
+			break;
+		case XEN_PCI_OP_enable_msix:
+			op->err = pciback_enable_msix(pdev, dev, op);
+			break;
+		case XEN_PCI_OP_disable_msix:
+			op->err = pciback_disable_msix(pdev, dev, op);
+			break;
 #endif
-			default:
-				op->err = XEN_PCI_ERR_not_implemented;
-				break;
+		default:
+			op->err = XEN_PCI_ERR_not_implemented;
+			break;
 		}
 	}
-	/* Tell the driver domain that we're done. */
+	/* Tell the driver domain that we're done. */
 	wmb();
 	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
 	notify_remote_via_irq(pdev->evtchn_irq);
@@ -119,7 +115,7 @@ void pciback_do_op(struct work_struct *data)
 	smp_mb__after_clear_bit(); /* /before/ final check for work */

 	/* Check to see if the driver domain tried to start another request in
-	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
+	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
 	*/
 	test_and_schedule_op(pdev);
 }
diff --git a/drivers/xen/pciback/slot.c b/drivers/xen/pciback/slot.c
index 105a8b6..efb922d 100644
--- a/drivers/xen/pciback/slot.c
+++ b/drivers/xen/pciback/slot.c
@@ -65,7 +65,8 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
 		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
 			if (slot_dev->slots[bus][slot] == NULL) {
 				printk(KERN_INFO
-				       "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
+				       "pciback: slot: %s: assign to virtual "
+				       "slot %d, bus %d\n",
 				       pci_name(dev), slot, bus);
 				slot_dev->slots[bus][slot] = dev;
 				goto unlock;
@@ -76,14 +77,14 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
 	xenbus_dev_fatal(pdev->xdev, err,
 			 "No more space on root virtual PCI bus");

-      unlock:
+unlock:
 	spin_unlock_irqrestore(&slot_dev->lock, flags);

 	/* Publish this device. */
-	if(!err)
+	if (!err)
 		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);

-      out:
+out:
 	return err;
 }

@@ -105,7 +106,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
 			}
 		}

-      out:
+out:
 	spin_unlock_irqrestore(&slot_dev->lock, flags);

 	if (found_dev)
@@ -156,8 +157,10 @@ void pciback_release_devices(struct pciback_device *pdev)
 	pdev->pci_dev_data = NULL;
 }

-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
-		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
+			     struct pciback_device *pdev,
+			     unsigned int *domain, unsigned int *bus,
+			     unsigned int *devfn)
 {
 	int slot, busnr;
 	struct slot_dev_data *slot_dev = pdev->pci_dev_data;
@@ -172,11 +175,12 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev
 			dev = slot_dev->slots[busnr][slot];
 			if (dev && dev->bus->number == pcidev->bus->number
 				&& dev->devfn == pcidev->devfn
-				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) {
+				&& pci_domain_nr(dev->bus) ==
+					pci_domain_nr(pcidev->bus)) {
 				found = 1;
 				*domain = 0;
 				*bus = busnr;
-				*devfn = PCI_DEVFN(slot,0);
+				*devfn = PCI_DEVFN(slot, 0);
 				goto out;
 			}
 		}
diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
index a5b7ece..721b81b 100644
--- a/drivers/xen/pciback/vpci.c
+++ b/drivers/xen/pciback/vpci.c
@@ -125,14 +125,14 @@ int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
 	xenbus_dev_fatal(pdev->xdev, err,
 			 "No more space on root virtual PCI bus");

-      unlock:
+unlock:
 	spin_unlock_irqrestore(&vpci_dev->lock, flags);

 	/* Publish this device. */
-	if(!err)
+	if (!err)
 		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);

-      out:
+out:
 	return err;
 }

@@ -158,7 +158,7 @@ void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
 		}
 	}

-      out:
+out:
 	spin_unlock_irqrestore(&vpci_dev->lock, flags);

 	if (found_dev)
@@ -176,9 +176,8 @@ int pciback_init_devices(struct pciback_device *pdev)

 	spin_lock_init(&vpci_dev->lock);

-	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
+	for (slot = 0; slot < PCI_SLOT_MAX; slot++)
 		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
-	}

 	pdev->pci_dev_data = vpci_dev;

@@ -211,8 +210,10 @@ void pciback_release_devices(struct pciback_device *pdev)
 	pdev->pci_dev_data = NULL;
 }

-int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
-		unsigned int *domain, unsigned int *bus, unsigned int *devfn)
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
+			     struct pciback_device *pdev,
+			     unsigned int *domain, unsigned int *bus,
+			     unsigned int *devfn)
 {
 	struct pci_dev_entry *entry;
 	struct pci_dev *dev = NULL;
@@ -227,15 +228,16 @@ int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev
 			    list) {
 			dev = entry->dev;
 			if (dev && dev->bus->number == pcidev->bus->number
-				&& pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)
-				&& dev->devfn == pcidev->devfn)
-			{
+				&& pci_domain_nr(dev->bus) ==
+					pci_domain_nr(pcidev->bus)
+				&& dev->devfn == pcidev->devfn) {
 				found = 1;
 				*domain = 0;
 				*bus = 0;
-				*devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn));
+				*devfn = PCI_DEVFN(slot,
+					 PCI_FUNC(pcidev->devfn));
 			}
-		}
+		}
 	}
 	spin_unlock_irqrestore(&vpci_dev->lock, flags);
 	return found;
diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
index a85c413..efec585 100644
--- a/drivers/xen/pciback/xenbus.c
+++ b/drivers/xen/pciback/xenbus.c
@@ -40,7 +40,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
 		kfree(pdev);
 		pdev = NULL;
 	}
-      out:
+out:
 	return pdev;
 }

@@ -111,7 +111,7 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
 	err = 0;

 	dev_dbg(&pdev->xdev->dev, "Attached!\n");
-      out:
+out:
 	return err;
 }

@@ -166,11 +166,10 @@ static int pciback_attach(struct pciback_device *pdev)
 				 "Error switching to connected state!");

 	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
-      out:
+out:
 	spin_unlock(&pdev->dev_lock);

-	if (magic)
-		kfree(magic);
+	kfree(magic);

 	return err;
 }
@@ -193,7 +192,7 @@ static int pciback_publish_pci_dev(struct pciback_device *pdev,
 			    "%04x:%02x:%02x.%02x", domain, bus,
 			    PCI_SLOT(devfn), PCI_FUNC(devfn));

-      out:
+out:
 	return err;
 }

@@ -230,7 +229,7 @@ static int pciback_export_device(struct pciback_device *pdev,
 	 * to other driver domains (as he who controls the bridge can disable
 	 * it and stop the other devices from working).
 	 */
-      out:
+out:
 	return err;
 }

@@ -253,8 +252,8 @@ static int pciback_remove_device(struct pciback_device *pdev,
 	}

 	pciback_release_pci_dev(pdev, dev);
-
-      out:
+
+out:
 	return err;
 }

@@ -314,7 +313,7 @@ static int pciback_publish_pci_root(struct pciback_device *pdev,
 	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
 			    "root_num", "%d", (root_num + 1));

-      out:
+out:
 	return err;
 }

@@ -358,7 +357,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
 		}
 		err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
 				   "%d", &substate);
-		if (err != 1)
+		if (err != 1)
 			substate = XenbusStateUnknown;

 		switch (substate) {
@@ -389,14 +388,15 @@ static int pciback_reconfigure(struct pciback_device *pdev)
 						 "configuration");
 				goto out;
 			}
-
+
 			err = pciback_export_device(pdev, domain, bus, slot,
 						    func, i);
 			if (err)
 				goto out;

 			/* Publish pci roots. */
-			err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
+			err = pciback_publish_pci_roots(pdev,
+						pciback_publish_pci_root);
 			if (err) {
 				xenbus_dev_fatal(pdev->xdev, err,
 						 "Error while publish PCI root"
@@ -412,7 +412,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)
 						 "Error switching substate of "
 						 "dev-%d\n", i);
 				goto out;
-			}
+			}
 			break;

 		case XenbusStateClosing:
@@ -445,7 +445,7 @@ static int pciback_reconfigure(struct pciback_device *pdev)

 			err = pciback_remove_device(pdev, domain, bus, slot,
 						    func);
-			if(err)
+			if (err)
 				goto out;

 			/* TODO: If at some point we implement support for pci
@@ -466,8 +466,8 @@ static int pciback_reconfigure(struct pciback_device *pdev)
 				 "Error switching to reconfigured state!");
 		goto out;
 	}
-
-      out:
+
+out:
 	spin_unlock(&pdev->dev_lock);

 	return 0;
@@ -591,7 +591,7 @@ static int pciback_setup_backend(struct pciback_device *pdev)
 			xenbus_dev_fatal(pdev->xdev, err, "Error switching "
 					 "substate of dev-%d\n", i);
 			goto out;
-		}
+		}
 	}

 	err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
@@ -607,7 +607,7 @@ static int pciback_setup_backend(struct pciback_device *pdev)
 		xenbus_dev_fatal(pdev->xdev, err,
 				 "Error switching to initialised state!");

-      out:
+out:
 	spin_unlock(&pdev->dev_lock);

 	if (!err)
@@ -663,7 +663,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
 	 */
 	pciback_be_watch(&pdev->be_watch, NULL, 0);

-      out:
+out:
 	return err;
 }

@@ -679,7 +679,7 @@ static int pciback_xenbus_remove(struct xenbus_device *dev)

 static const struct xenbus_device_id xenpci_ids[] = {
 	{"pci"},
-	{{0}},
+	{""},
 };

 static struct xenbus_driver xenbus_pciback_driver = {
--
1.7.4


From ca1ee0c25b425d9739b1a24cf911de2e041a2514 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 8 Mar 2010 18:39:15 -0500
Subject: [PATCH 022/197] xen-pciback: remove driver_data direct access to struct device

The driver core is going to not allow direct access to the
driver_data pointer in struct device.  Instead, the functions
dev_get_drvdata() and dev_set_drvdata() should be used.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/xenbus.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
index efec585..af6c25a 100644
--- a/drivers/xen/pciback/xenbus.c
+++ b/drivers/xen/pciback/xenbus.c
@@ -26,7 +26,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
 	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);

 	pdev->xdev = xdev;
-	xdev->dev.driver_data = pdev;
+	dev_set_drvdata(&xdev->dev, pdev);

 	spin_lock_init(&pdev->dev_lock);

@@ -75,7 +75,7 @@ static void free_pdev(struct pciback_device *pdev)

 	pciback_release_devices(pdev);

-	pdev->xdev->dev.driver_data = NULL;
+	dev_set_drvdata(&pdev->xdev->dev, NULL);
 	pdev->xdev = NULL;

 	kfree(pdev);
@@ -476,7 +476,7 @@ out:
 static void pciback_frontend_changed(struct xenbus_device *xdev,
 				     enum xenbus_state fe_state)
 {
-	struct pciback_device *pdev = xdev->dev.driver_data;
+	struct pciback_device *pdev = dev_get_drvdata(&xdev->dev);

 	dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);

@@ -669,7 +669,7 @@ out:

 static int pciback_xenbus_remove(struct xenbus_device *dev)
 {
-	struct pciback_device *pdev = dev->dev.driver_data;
+	struct pciback_device *pdev = dev_get_drvdata(&dev->dev);

 	if (pdev != NULL)
 		free_pdev(pdev);
--
1.7.4


From 585f088e6aec3e4514ac2563852961f71c74e47e Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 8 Mar 2010 18:47:55 -0500
Subject: [PATCH 023/197] xen-pciback: Fix compile error: 'TASK_NORMAL' undeclared.

Both files were missing the #include <linux/sched.h>

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/pci_stub.c    |    1 +
 drivers/xen/pciback/pciback_ops.c |    1 +
 2 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index 28222ee..6fc0b6e 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -12,6 +12,7 @@
 #include <linux/kref.h>
 #include <linux/pci.h>
 #include <linux/wait.h>
+#include <linux/sched.h>
 #include <asm/atomic.h>
 #include <xen/events.h>
 #include <asm/xen/pci.h>
diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
index bf83dca..2b9a93e 100644
--- a/drivers/xen/pciback/pciback_ops.c
+++ b/drivers/xen/pciback/pciback_ops.c
@@ -7,6 +7,7 @@
 #include <linux/wait.h>
 #include <linux/bitops.h>
 #include <xen/events.h>
+#include <linux/sched.h>
 #include "pciback.h"

 int verbose_request;
--
1.7.4


From 03dd111c81bad8e69cdb8b5d67381702adb24593 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 9 Dec 2009 17:43:16 -0500
Subject: [PATCH 024/197] xen-pciback: Remove the vestiges of CONFIG_PCI_GUESTDEV.

The same functionality for this (that used to be called
pci_is_guestdev) is now via: "pci=resource_alignment="
command line argument.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/pci_stub.c |   10 ----------
 1 files changed, 0 insertions(+), 10 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index 6fc0b6e..d30aa7c 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -430,16 +430,6 @@ static int __devinit pcistub_probe(struct pci_dev *dev,

 		dev_info(&dev->dev, "seizing device\n");
 		err = pcistub_seize(dev);
-#ifdef CONFIG_PCI_GUESTDEV
-	} else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
-		if (!pci_is_guestdev(dev)) {
-			err = -ENODEV;
-			goto out;
-		}
-
-		dev_info(&dev->dev, "seizing device\n");
-		err = pcistub_seize(dev);
-#endif /* CONFIG_PCI_GUESTDEV */
 	} else
 		/* Didn't find the device */
 		err = -ENODEV;
--
1.7.4


From 30acb3491495a43b59a64612ad92a7a290c59e82 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 9 Dec 2009 17:43:17 -0500
Subject: [PATCH 025/197] xen-pciback: Remove deprecated routine to find domain owner of PCI device.

In linux-2.6.18.hg tree the mechanism to find the domain owner was
for the MSI driver (msi-xen.c) to call in this function to retrieve
the domain number. This is not the way anymore.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/pci_stub.c |   19 -------------------
 1 files changed, 0 insertions(+), 19 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index d30aa7c..30e7b59 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -1157,22 +1157,6 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)

 DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);

-#ifdef CONFIG_PCI_MSI
-
-int pciback_get_owner(struct pci_dev *dev)
-{
-	struct pcistub_device *psdev;
-
-	psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
-			PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
-
-	if (!psdev || !psdev->pdev)
-		return -1;
-
-	return psdev->pdev->xdev->otherend_id;
-}
-#endif
-
 static void pcistub_exit(void)
 {
 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
@@ -1183,7 +1167,6 @@ static void pcistub_exit(void)
 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);

 	pci_unregister_driver(&pciback_pci_driver);
-	WARN_ON(unregister_msi_get_owner(pciback_get_owner));
 }

 static int __init pcistub_init(void)
@@ -1241,8 +1224,6 @@ static int __init pcistub_init(void)
 		err = driver_create_file(&pciback_pci_driver.driver,
 					 &driver_attr_permissive);

-	if (!err)
-		err = register_msi_get_owner(pciback_get_owner);
 	if (err)
 		pcistub_exit();

--
1.7.4


From da36c7662d9738ce44c37b4f1f41c045c64d6914 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 22 Dec 2009 13:53:41 -0500
Subject: [PATCH 026/197] xen-pciback: Fix compiler warning in pci_stub.c.

warning: the frame size of 1036 bytes is larger than 1024 bytes

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/pci_stub.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index 30e7b59..0b5a16b 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -496,16 +496,17 @@ static const struct pci_device_id pcistub_ids[] = {
 	{0,},
 };

+#define PCI_NODENAME_MAX 40
 static void kill_domain_by_device(struct pcistub_device *psdev)
 {
 	struct xenbus_transaction xbt;
 	int err;
-	char nodename[1024];
+	char nodename[PCI_NODENAME_MAX];

 	if (!psdev)
 		dev_err(&psdev->dev->dev,
 			"device is NULL when do AER recovery/kill_domain\n");
-	sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
+	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
 		psdev->pdev->xdev->otherend_id);
 	nodename[strlen(nodename)] = '\0';

--
1.7.4


From 83484f34b2cc42807c71514fbabbd40e281ec094 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 22 Dec 2009 13:53:42 -0500
Subject: [PATCH 027/197] xen-pciback: Fix compile warning in vpci.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

warning: ‘func’ may be used uninitialized in this function

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/vpci.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/pciback/vpci.c b/drivers/xen/pciback/vpci.c
index 721b81b..2857ab8 100644
--- a/drivers/xen/pciback/vpci.c
+++ b/drivers/xen/pciback/vpci.c
@@ -65,7 +65,7 @@ static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
 int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
 			int devid, publish_pci_dev_cb publish_cb)
 {
-	int err = 0, slot, func;
+	int err = 0, slot, func = -1;
 	struct pci_dev_entry *t, *dev_entry;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
 	unsigned long flags;
--
1.7.4


From 5612e6358835700c49d8be5671823614ace30c94 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Thu, 3 Dec 2009 21:56:20 +0000
Subject: [PATCH 028/197] xen: rename pciback module to xen-pciback.

pciback is rather generic for a modular distro style kernel.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/Makefile |   24 ++++++++++++------------
 1 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/xen/pciback/Makefile b/drivers/xen/pciback/Makefile
index 106dae7..38bc123 100644
--- a/drivers/xen/pciback/Makefile
+++ b/drivers/xen/pciback/Makefile
@@ -1,16 +1,16 @@
-obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o

-pciback-y := pci_stub.o pciback_ops.o xenbus.o
-pciback-y += conf_space.o conf_space_header.o \
-	     conf_space_capability.o \
-	     conf_space_capability_vpd.o \
-	     conf_space_capability_pm.o \
-             conf_space_quirks.o
-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
+xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
+xen-pciback-y += conf_space.o conf_space_header.o \
+		 conf_space_capability.o \
+		 conf_space_capability_vpd.o \
+		 conf_space_capability_pm.o \
+		 conf_space_quirks.o
+xen-pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
+xen-pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o

 ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
--
1.7.4


From 245a9ec5ef1f9c8a6bc6b5c0ac1bb616c3c8c979 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 9 Dec 2009 17:43:15 -0500
Subject: [PATCH 029/197] xen-pciback: Register the owner (domain) of the PCI device.

When the front-end and back-end start negotiating we register
the domain that will use the PCI device. Furthermore during shutdown
of guest or unbinding of the PCI device (and unloading of module)
from pciback we unregister the domain owner.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/pciback/pci_stub.c |    2 ++
 drivers/xen/pciback/xenbus.c   |   13 +++++++++++++
 2 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index 0b5a16b..02178e2 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -90,6 +90,8 @@ static void pcistub_device_release(struct kref *kref)

 	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");

+	xen_unregister_device_domain_owner(psdev->dev);
+
 	/* Clean-up the device */
 	pciback_reset_device(psdev->dev);
 	pciback_config_free_dyn_fields(psdev->dev);
diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
index af6c25a..d448bf5 100644
--- a/drivers/xen/pciback/xenbus.c
+++ b/drivers/xen/pciback/xenbus.c
@@ -10,6 +10,7 @@
 #include <linux/workqueue.h>
 #include <xen/xenbus.h>
 #include <xen/events.h>
+#include <asm/xen/pci.h>
 #include <linux/workqueue.h>
 #include "pciback.h"

@@ -221,6 +222,15 @@ static int pciback_export_device(struct pciback_device *pdev,
 	if (err)
 		goto out;

+	dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
+	if (xen_register_device_domain_owner(dev,
+					     pdev->xdev->otherend_id) != 0) {
+		dev_err(&dev->dev, "device has been assigned to another " \
+			"domain! Over-writting the ownership, but beware.\n");
+		xen_unregister_device_domain_owner(dev);
+		xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
+	}
+
 	/* TODO: It'd be nice to export a bridge and have all of its children
 	 * get exported with it. This may be best done in xend (which will
 	 * have to calculate resource usage anyway) but we probably want to
@@ -251,6 +261,9 @@ static int pciback_remove_device(struct pciback_device *pdev,
 		goto out;
 	}

+	dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
+	xen_unregister_device_domain_owner(dev);
+
 	pciback_release_pci_dev(pdev, dev);

 out:
--
1.7.4


From cb6c976606d16119e8608c8bcc1ef9265881dd7f Mon Sep 17 00:00:00 2001
From: Zhao, Yu <yu.zhao@intel.com>
Date: Wed, 3 Mar 2010 13:27:55 -0500
Subject: [PATCH 030/197] xen-pciback: guest SR-IOV support for PV guest

These changes are for PV guest to use Virtual Function. Because the VF's
vendor, device registers in cfg space are 0xffff, which are invalid and
ignored by PCI device scan. Values in 'struct pci_dev' are fixed up by
SR-IOV code, and using these values will present correct VID and DID to
PV guest kernel.

And command registers in the cfg space are read only 0, which means we
have to emulate MMIO enable bit (VF only uses MMIO  resource) so PV
kernel can work properly.

Acked-by: jbeulich@novell.com

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/conf_space_header.c |   71 ++++++++++++++++++++++++++++--
 1 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
index 3ae7da1..1f4f86e 100644
--- a/drivers/xen/pciback/conf_space_header.c
+++ b/drivers/xen/pciback/conf_space_header.c
@@ -18,6 +18,25 @@ struct pci_bar_info {
 #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
 #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)

+static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
+{
+	int i;
+	int ret;
+
+	ret = pciback_read_config_word(dev, offset, value, data);
+	if (!atomic_read(&dev->enable_cnt))
+		return ret;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+		if (dev->resource[i].flags & IORESOURCE_IO)
+			*value |= PCI_COMMAND_IO;
+		if (dev->resource[i].flags & IORESOURCE_MEM)
+			*value |= PCI_COMMAND_MEMORY;
+	}
+
+	return ret;
+}
+
 static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 {
 	int err;
@@ -142,10 +161,26 @@ static inline void read_dev_bar(struct pci_dev *dev,
 				struct pci_bar_info *bar_info, int offset,
 				u32 len_mask)
 {
-	pci_read_config_dword(dev, offset, &bar_info->val);
-	pci_write_config_dword(dev, offset, len_mask);
-	pci_read_config_dword(dev, offset, &bar_info->len_val);
-	pci_write_config_dword(dev, offset, bar_info->val);
+	int	pos;
+	struct resource	*res = dev->resource;
+
+	if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
+		pos = PCI_ROM_RESOURCE;
+	else {
+		pos = (offset - PCI_BASE_ADDRESS_0) / 4;
+		if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
+				PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
+			   (PCI_BASE_ADDRESS_SPACE_MEMORY |
+				PCI_BASE_ADDRESS_MEM_TYPE_64))) {
+			bar_info->val = res[pos - 1].start >> 32;
+			bar_info->len_val = res[pos - 1].end >> 32;
+			return;
+		}
+	}
+
+	bar_info->val = res[pos].start |
+			(res[pos].flags & PCI_REGION_FLAG_MASK);
+	bar_info->len_val = res[pos].end - res[pos].start + 1;
 }

 static void *bar_init(struct pci_dev *dev, int offset)
@@ -186,6 +221,22 @@ static void bar_release(struct pci_dev *dev, int offset, void *data)
 	kfree(data);
 }

+static int pciback_read_vendor(struct pci_dev *dev, int offset,
+			       u16 *value, void *data)
+{
+	*value = dev->vendor;
+
+	return 0;
+}
+
+static int pciback_read_device(struct pci_dev *dev, int offset,
+			       u16 *value, void *data)
+{
+	*value = dev->device;
+
+	return 0;
+}
+
 static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
 			  void *data)
 {
@@ -213,9 +264,19 @@ out:

 static const struct config_field header_common[] = {
 	{
+	 .offset    = PCI_VENDOR_ID,
+	 .size      = 2,
+	 .u.w.read  = pciback_read_vendor,
+	},
+	{
+	 .offset    = PCI_DEVICE_ID,
+	 .size      = 2,
+	 .u.w.read  = pciback_read_device,
+	},
+	{
 	 .offset    = PCI_COMMAND,
 	 .size      = 2,
-	 .u.w.read  = pciback_read_config_word,
+	 .u.w.read  = command_read,
 	 .u.w.write = command_write,
 	},
 	{
--
1.7.4


From 1d77305c7900f3b6ec5d403d9aba6f0034b0112e Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 3 Mar 2010 13:38:43 -0500
Subject: [PATCH 031/197] xen-pciback: Disable MSI/MSI-X when reseting device

In cases where the guest is abruptly killed and has not disabled
MSI/MSI-X interrupts we want to do that.

Otherwise when the guest is started up and enables MSI, we would
get a WARN() that the device already had been enabled.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/pciback_ops.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
index 2b9a93e..011db67 100644
--- a/drivers/xen/pciback/pciback_ops.c
+++ b/drivers/xen/pciback/pciback_ops.c
@@ -23,6 +23,14 @@ void pciback_reset_device(struct pci_dev *dev)

 	/* Disable devices (but not bridges) */
 	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
+#ifdef CONFIG_PCI_MSI
+		/* The guest could have been abruptly killed without
+		 * disabling MSI/MSI-X interrupts.*/
+		if (dev->msix_enabled)
+			pci_disable_msix(dev);
+		if (dev->msi_enabled)
+			pci_disable_msi(dev);
+#endif
 		pci_disable_device(dev);

 		pci_write_config_word(dev, PCI_COMMAND, 0);
--
1.7.4


From c89edb63b60166fe354493dd465cf5662b2c077d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 12 Apr 2010 11:46:00 -0400
Subject: [PATCH 032/197] xen-pciback: Allocate IRQ handler for device that is shared with guest.

If the pciback module is loaded with fake_irq_handler=1 we install
for all devices that are to be passed to the guest domain a IRQ handler.
The IRQ handler will return IRQ_HANDLED or IRQ_NONE depending on
on the ack_intr flag.

The trigger to install this IRQ handler is when the enable_isr flag
is set.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/pci_stub.c    |   13 ++++-
 drivers/xen/pciback/pciback.h     |   12 ++++-
 drivers/xen/pciback/pciback_ops.c |   95 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index 02178e2..45bbe99 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -21,6 +21,8 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"

+#define DRV_NAME	"pciback"
+
 static char *pci_devs_to_hide;
 wait_queue_head_t aer_wait_queue;
 /*Add sem for sync AER handling and pciback remove/reconfigue ops,
@@ -290,13 +292,20 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
 	 * would need to be called somewhere to free the memory allocated
 	 * here and then to call kfree(pci_get_drvdata(psdev->dev)).
 	 */
-	dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
+	dev_data = kzalloc(sizeof(*dev_data) +  strlen(DRV_NAME "[]")
+				+ strlen(pci_name(dev)) + 1, GFP_ATOMIC);
 	if (!dev_data) {
 		err = -ENOMEM;
 		goto out;
 	}
 	pci_set_drvdata(dev, dev_data);

+	/*
+	 * Setup name for fake IRQ handler. It will only be enabled
+	 * once the device is turned on by the guest.
+	 */
+	sprintf(dev_data->irq_name, DRV_NAME "[%s]", pci_name(dev));
+
 	dev_dbg(&dev->dev, "initializing config\n");

 	init_waitqueue_head(&aer_wait_queue);
@@ -837,7 +846,7 @@ static struct pci_error_handlers pciback_error_handler = {
  */

 static struct pci_driver pciback_pci_driver = {
-	.name = "pciback",
+	.name = DRV_NAME,
 	.id_table = pcistub_ids,
 	.probe = pcistub_probe,
 	.remove = pcistub_remove,
diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
index 98e2912..9d1b0a6 100644
--- a/drivers/xen/pciback/pciback.h
+++ b/drivers/xen/pciback/pciback.h
@@ -45,8 +45,13 @@ struct pciback_device {

 struct pciback_dev_data {
 	struct list_head config_fields;
-	int permissive;
-	int warned_on_write;
+	unsigned int permissive : 1;
+	unsigned int warned_on_write : 1;
+	unsigned int enable_intx : 1;
+	unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */
+	unsigned int ack_intr : 1; /* .. and ACK-ing */
+	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
+	char irq_name[0]; /* pciback[000:04:00.0] */
 };

 /* Used by XenBus and pciback_ops.c */
@@ -131,3 +136,6 @@ extern int verbose_request;
 void test_and_schedule_op(struct pciback_device *pdev);
 #endif

+/* Handles shared IRQs that can to device domain and control domain. */
+void pciback_irq_handler(struct pci_dev *dev, int reset);
+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id);
diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
index 011db67..cb54893 100644
--- a/drivers/xen/pciback/pciback_ops.c
+++ b/drivers/xen/pciback/pciback_ops.c
@@ -13,6 +13,78 @@
 int verbose_request;
 module_param(verbose_request, int, 0644);

+/* Ensure a device is has the fake IRQ handler "turned on/off" and is
+ * ready to be exported. This MUST be run after pciback_reset_device
+ * which does the actual PCI device enable/disable.
+ */
+void pciback_control_isr(struct pci_dev *dev, int reset)
+{
+	struct pciback_dev_data *dev_data;
+	int rc;
+	int enable = 0;
+
+	dev_data = pci_get_drvdata(dev);
+	if (!dev_data)
+		return;
+
+	/* We don't deal with bridges */
+	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
+		return;
+
+	if (reset) {
+		dev_data->enable_intx = 0;
+		dev_data->ack_intr = 0;
+	}
+	enable =  dev_data->enable_intx;
+
+	/* Asked to disable, but ISR isn't runnig */
+	if (!enable && !dev_data->isr_on)
+		return;
+
+	/* Squirrel away the IRQs in the dev_data. We need this
+	 * b/c when device transitions to MSI, the dev->irq is
+	 * overwritten with the MSI vector.
+	 */
+	if (enable)
+		dev_data->irq = dev->irq;
+
+	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
+		dev_data->irq_name,
+		dev_data->irq,
+		pci_is_enabled(dev) ? "on" : "off",
+		dev->msi_enabled ? "MSI" : "",
+		dev->msix_enabled ? "MSI/X" : "",
+		dev_data->isr_on ? "enable" : "disable",
+		enable ? "enable" : "disable");
+
+	if (enable) {
+		rc = request_irq(dev_data->irq,
+				pciback_guest_interrupt, IRQF_SHARED,
+				dev_data->irq_name, dev);
+		if (rc) {
+			dev_err(&dev->dev, "%s: failed to install fake IRQ " \
+				"handler for IRQ %d! (rc:%d)\n", dev_data->irq_name,
+				dev_data->irq, rc);
+			goto out;
+		}
+	}
+	else {
+		free_irq(dev_data->irq, dev);
+		dev_data->irq = 0;
+	}
+	dev_data->isr_on = enable;
+	dev_data->ack_intr = enable;
+out:
+	dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
+		dev_data->irq_name,
+		dev_data->irq,
+		pci_is_enabled(dev) ? "on" : "off",
+		dev->msi_enabled ? "MSI" : "",
+		dev->msix_enabled ? "MSI/X" : "",
+		enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
+			(dev_data->isr_on ? "failed to disable" : "disabled"));
+}
+
 /* Ensure a device is "turned off" and ready to be exported.
  * (Also see pciback_config_reset to ensure virtual configuration space is
  * ready to be re-exported)
@@ -21,6 +93,8 @@ void pciback_reset_device(struct pci_dev *dev)
 {
 	u16 cmd;

+	pciback_control_isr(dev, 1 /* reset device */);
+
 	/* Disable devices (but not bridges) */
 	if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
 #ifdef CONFIG_PCI_MSI
@@ -78,13 +152,18 @@ void pciback_do_op(struct work_struct *data)
 	struct pciback_device *pdev =
 		container_of(data, struct pciback_device, op_work);
 	struct pci_dev *dev;
+	struct pciback_dev_data *dev_data = NULL;
 	struct xen_pci_op *op = &pdev->sh_info->op;
+	int test_intx = 0;

 	dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);

 	if (dev == NULL)
 		op->err = XEN_PCI_ERR_dev_not_found;
 	else {
+		dev_data = pci_get_drvdata(dev);
+		if (dev_data)
+			test_intx = dev_data->enable_intx;
 		switch (op->cmd) {
 		case XEN_PCI_OP_conf_read:
 			op->err = pciback_config_read(dev,
@@ -109,10 +188,15 @@ void pciback_do_op(struct work_struct *data)
 			break;
 #endif
 		default:
-			op->err = XEN_PCI_ERR_not_implemented;
+			op->err = XEN_PCI_ERR_not_implemented;
 			break;
 		}
 	}
+	if (!op->err && dev && dev_data) {
+		/* Transition detected */
+		if ((dev_data->enable_intx != test_intx))
+			pciback_control_isr(dev, 0 /* no reset */);
+	}
 	/* Tell the driver domain that we're done. */
 	wmb();
 	clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
@@ -137,3 +221,12 @@ irqreturn_t pciback_handle_event(int irq, void *dev_id)

 	return IRQ_HANDLED;
 }
+irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
+{
+	struct pci_dev *dev = (struct pci_dev *)dev_id;
+	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+
+	if (dev_data->isr_on && dev_data->ack_intr)
+		return IRQ_HANDLED;
+	return IRQ_NONE;
+}
--
1.7.4


From 29a451f41647deedc2fa535520e648c76755568c Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 12 Apr 2010 11:47:15 -0400
Subject: [PATCH 033/197] xen-pciback: Add SysFS instrumentation for the fake IRQ handler.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/pci_stub.c |   75 +++++++++++++++++++++++++++++++++++++++-
 1 files changed, 74 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index 45bbe99..ee2cd68 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -1038,6 +1038,70 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)

 DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);

+static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
+{
+	struct pcistub_device *psdev;
+	struct pciback_dev_data *dev_data;
+	size_t count = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pcistub_devices_lock, flags);
+	list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+		if (count >= PAGE_SIZE)
+			break;
+		if (!psdev->dev)
+			continue;
+		dev_data = pci_get_drvdata(psdev->dev);
+		if (!dev_data)
+			continue;
+		count +=
+		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing\n",
+			      pci_name(psdev->dev),
+			      dev_data->isr_on ? "on" : "off",
+			      dev_data->ack_intr ? "ack" : "not ack");
+	}
+	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+	return count;
+}
+
+DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
+
+static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
+					  const char *buf,
+					  size_t count)
+{
+	struct pcistub_device *psdev;
+	struct pciback_dev_data *dev_data;
+	int domain, bus, slot, func;
+	int err = -ENOENT;
+
+	err = str_to_slot(buf, &domain, &bus, &slot, &func);
+	if (err)
+		goto out;
+
+	psdev = pcistub_device_find(domain, bus, slot, func);
+
+	if (!psdev)
+		goto out;
+
+	dev_data = pci_get_drvdata(psdev->dev);
+	if (!dev_data)
+		goto out;
+
+	dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
+		dev_data->irq_name, dev_data->isr_on,
+		!dev_data->isr_on);
+
+	dev_data->isr_on = !(dev_data->isr_on);
+	if (dev_data->isr_on)
+		dev_data->ack_intr = 1;
+out:
+	if (!err)
+		err = count;
+	return err;
+}
+DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
+
 static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
 				 size_t count)
 {
@@ -1177,7 +1241,10 @@ static void pcistub_exit(void)
 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
 	driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
-
+	driver_remove_file(&pciback_pci_driver.driver,
+			   &driver_attr_irq_handlers);
+	driver_remove_file(&pciback_pci_driver.driver,
+			   &driver_attr_irq_handler_state);
 	pci_unregister_driver(&pciback_pci_driver);
 }

@@ -1236,6 +1303,12 @@ static int __init pcistub_init(void)
 		err = driver_create_file(&pciback_pci_driver.driver,
 					 &driver_attr_permissive);

+	if (!err)
+		err = driver_create_file(&pciback_pci_driver.driver,
+					 &driver_attr_irq_handlers);
+	if (!err)
+		err = driver_create_file(&pciback_pci_driver.driver,
+					&driver_attr_irq_handler_state);
 	if (err)
 		pcistub_exit();

--
1.7.4


From 6c7c36d411eeab67192fe0ed96ac1e048b4a1755 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 19 Apr 2010 14:39:10 -0400
Subject: [PATCH 034/197] xen-pciback: When device transitions to MSI/MSI-X stop ACK-ing on the
 legacy interrupt.

But don't remove the irq handler from the legacy interrupt. The device
might still transition back to the legacy interrupts.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/conf_space_capability_msi.c |   17 ++++++++++++++++-
 1 files changed, 16 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
index b70ea8b..a236e2d 100644
--- a/drivers/xen/pciback/conf_space_capability_msi.c
+++ b/drivers/xen/pciback/conf_space_capability_msi.c
@@ -12,6 +12,7 @@
 int pciback_enable_msi(struct pciback_device *pdev,
 		struct pci_dev *dev, struct xen_pci_op *op)
 {
+	struct pciback_dev_data *dev_data;
 	int otherend = pdev->xdev->otherend_id;
 	int status;

@@ -27,21 +28,29 @@ int pciback_enable_msi(struct pciback_device *pdev,
 	/* The value the guest needs is actually the IDT vector, not the
 	 * the local domain's IRQ number. */
 	op->value = xen_gsi_from_irq(dev->irq);
+	dev_data = pci_get_drvdata(dev);
+	if (dev_data)
+		dev_data->ack_intr = 0;
 	return 0;
 }

 int pciback_disable_msi(struct pciback_device *pdev,
 		struct pci_dev *dev, struct xen_pci_op *op)
 {
+	struct pciback_dev_data *dev_data;
 	pci_disable_msi(dev);

 	op->value = xen_gsi_from_irq(dev->irq);
+	dev_data = pci_get_drvdata(dev);
+	if (dev_data)
+		dev_data->ack_intr = 1;
 	return 0;
 }

 int pciback_enable_msix(struct pciback_device *pdev,
 		struct pci_dev *dev, struct xen_pci_op *op)
 {
+	struct pciback_dev_data *dev_data;
 	int i, result;
 	struct msix_entry *entries;

@@ -68,6 +77,9 @@ int pciback_enable_msix(struct pciback_device *pdev,
 	kfree(entries);

 	op->value = result;
+	dev_data = pci_get_drvdata(dev);
+	if (dev_data)
+		dev_data->ack_intr = 0;

 	return result;
 }
@@ -75,10 +87,13 @@ int pciback_enable_msix(struct pciback_device *pdev,
 int pciback_disable_msix(struct pciback_device *pdev,
 		struct pci_dev *dev, struct xen_pci_op *op)
 {
-
+	struct pciback_dev_data *dev_data;
 	pci_disable_msix(dev);

 	op->value = xen_gsi_from_irq(dev->irq);
+	dev_data = pci_get_drvdata(dev);
+	if (dev_data)
+		dev_data->ack_intr = 1;
 	return 0;
 }

--
1.7.4


From c1cc36c68f096f2b1e796ba84d9c583009939d91 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 19 Apr 2010 14:40:38 -0400
Subject: [PATCH 035/197] xen-pciback: Enable interrupt handler when device is enabled.

And also request it to be disabled when the device has been
disabled.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/conf_space_header.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
index 1f4f86e..cb450f4 100644
--- a/drivers/xen/pciback/conf_space_header.c
+++ b/drivers/xen/pciback/conf_space_header.c
@@ -39,8 +39,10 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)

 static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 {
+	struct pciback_dev_data *dev_data;
 	int err;

+	dev_data = pci_get_drvdata(dev);
 	if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
 		if (unlikely(verbose_request))
 			printk(KERN_DEBUG "pciback: %s: enable\n",
@@ -48,11 +50,15 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 		err = pci_enable_device(dev);
 		if (err)
 			return err;
+		if (dev_data)
+			dev_data->enable_intx = 1;
 	} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
 		if (unlikely(verbose_request))
 			printk(KERN_DEBUG "pciback: %s: disable\n",
 			       pci_name(dev));
 		pci_disable_device(dev);
+		if (dev_data)
+			dev_data->enable_intx = 0;
 	}

 	if (!dev->is_busmaster && is_master_cmd(value)) {
--
1.7.4


From a732e3d6ed4831c460586bd7a16ef7f6b7d28936 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 19 Apr 2010 16:23:06 -0400
Subject: [PATCH 036/197] xen-pciback: Probe the IRQ line to check if it is not shared.

If it is not shared, we stop ACK-ing the IRQ line as there is
no need for this irq handler to return IRQ_HANDLED.

We have to this check once much much later than the pciback
and pcifront have started talking as guests doing the hypercall
that would notify the other guest that the IRQ line is shared
is done asynchronously.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/pci_stub.c    |    5 +++--
 drivers/xen/pciback/pciback.h     |    1 +
 drivers/xen/pciback/pciback_ops.c |   12 +++++++++++-
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index ee2cd68..88c7ca1 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -1055,10 +1055,11 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
 		if (!dev_data)
 			continue;
 		count +=
-		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing\n",
+		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n",
 			      pci_name(psdev->dev),
 			      dev_data->isr_on ? "on" : "off",
-			      dev_data->ack_intr ? "ack" : "not ack");
+			      dev_data->ack_intr ? "ack" : "not ack",
+			      dev_data->handled);
 	}
 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
 	return count;
diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
index 9d1b0a6..fc31052 100644
--- a/drivers/xen/pciback/pciback.h
+++ b/drivers/xen/pciback/pciback.h
@@ -50,6 +50,7 @@ struct pciback_dev_data {
 	unsigned int enable_intx : 1;
 	unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */
 	unsigned int ack_intr : 1; /* .. and ACK-ing */
+	unsigned long handled;
 	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
 	char irq_name[0]; /* pciback[000:04:00.0] */
 };
diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
index cb54893..5543881 100644
--- a/drivers/xen/pciback/pciback_ops.c
+++ b/drivers/xen/pciback/pciback_ops.c
@@ -226,7 +226,17 @@ irqreturn_t pciback_guest_interrupt(int irq, void *dev_id)
 	struct pci_dev *dev = (struct pci_dev *)dev_id;
 	struct pciback_dev_data *dev_data = pci_get_drvdata(dev);

-	if (dev_data->isr_on && dev_data->ack_intr)
+	if (dev_data->isr_on && dev_data->ack_intr) {
+		dev_data->handled++;
+		if ((dev_data->handled % 1000) == 0) {
+			if (xen_ignore_irq(irq)) {
+				printk(KERN_INFO "%s IRQ line is not shared "
+					"with other domains. Turning ISR off\n",
+					 dev_data->irq_name);
+				dev_data->ack_intr = 0;
+			}
+		}
 		return IRQ_HANDLED;
+	}
 	return IRQ_NONE;
 }
--
1.7.4


From 3312c11c3f9c857b2457c293e6b6e15928a32f32 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 20 Apr 2010 20:22:40 -0400
Subject: [PATCH 037/197] xen-pciback: Add debug statements for the MSI/MSI-X configuration module.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/conf_space_capability_msi.c |   11 +++++++++++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
index a236e2d..b15131e 100644
--- a/drivers/xen/pciback/conf_space_capability_msi.c
+++ b/drivers/xen/pciback/conf_space_capability_msi.c
@@ -16,6 +16,9 @@ int pciback_enable_msi(struct pciback_device *pdev,
 	int otherend = pdev->xdev->otherend_id;
 	int status;

+ 	if (unlikely(verbose_request))
+		printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));
+
 	status = pci_enable_msi(dev);

 	if (status) {
@@ -31,6 +34,7 @@ int pciback_enable_msi(struct pciback_device *pdev,
 	dev_data = pci_get_drvdata(dev);
 	if (dev_data)
 		dev_data->ack_intr = 0;
+
 	return 0;
 }

@@ -38,6 +42,9 @@ int pciback_disable_msi(struct pciback_device *pdev,
 		struct pci_dev *dev, struct xen_pci_op *op)
 {
 	struct pciback_dev_data *dev_data;
+
+ 	if (unlikely(verbose_request))
+		printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
 	pci_disable_msi(dev);

 	op->value = xen_gsi_from_irq(dev->irq);
@@ -54,6 +61,8 @@ int pciback_enable_msix(struct pciback_device *pdev,
 	int i, result;
 	struct msix_entry *entries;

+ 	if (unlikely(verbose_request))
+		printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
 	if (op->value > SH_INFO_MAX_VEC)
 		return -EINVAL;

@@ -88,6 +97,8 @@ int pciback_disable_msix(struct pciback_device *pdev,
 		struct pci_dev *dev, struct xen_pci_op *op)
 {
 	struct pciback_dev_data *dev_data;
+ 	if (unlikely(verbose_request))
+		printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev));
 	pci_disable_msix(dev);

 	op->value = xen_gsi_from_irq(dev->irq);
--
1.7.4


From 52257d7ad18bd91fd614df5ef960a88af3ed5200 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 23 Jul 2010 14:35:47 -0400
Subject: [PATCH 038/197] xen-pciback: Redo spinlock usage.

We were using coarse spinlocks that could end up with a deadlock.
This patch fixes that and makes the spinlocks much more fine-grained.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/xenbus.c |   34 +++++++++++++++++++++-------------
 1 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
index d448bf5..f0d5426 100644
--- a/drivers/xen/pciback/xenbus.c
+++ b/drivers/xen/pciback/xenbus.c
@@ -54,23 +54,31 @@ static void pciback_disconnect(struct pciback_device *pdev)
 		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
 		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
 	}
+	spin_unlock(&pdev->dev_lock);

 	/* If the driver domain started an op, make sure we complete it
 	 * before releasing the shared memory */
+
+	/* Note, the workqueue does not use spinlocks at all.*/
 	flush_workqueue(pciback_wq);

+	spin_lock(&pdev->dev_lock);
 	if (pdev->sh_info != NULL) {
 		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
 		pdev->sh_info = NULL;
 	}
-
 	spin_unlock(&pdev->dev_lock);
+
 }

 static void free_pdev(struct pciback_device *pdev)
 {
-	if (pdev->be_watching)
+	spin_lock(&pdev->dev_lock);
+	if (pdev->be_watching) {
 		unregister_xenbus_watch(&pdev->be_watch);
+		pdev->be_watching = 0;
+	}
+	spin_unlock(&pdev->dev_lock);

 	pciback_disconnect(pdev);

@@ -98,7 +106,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
 				"Error mapping other domain page in ours.");
 		goto out;
 	}
+
+	spin_lock(&pdev->dev_lock);
 	pdev->sh_info = vaddr;
+	spin_unlock(&pdev->dev_lock);

 	err = bind_interdomain_evtchn_to_irqhandler(
 		pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
@@ -108,7 +119,10 @@ static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
 				 "Error binding event channel to IRQ");
 		goto out;
 	}
+
+	spin_lock(&pdev->dev_lock);
 	pdev->evtchn_irq = err;
+	spin_unlock(&pdev->dev_lock);
 	err = 0;

 	dev_dbg(&pdev->xdev->dev, "Attached!\n");
@@ -122,7 +136,6 @@ static int pciback_attach(struct pciback_device *pdev)
 	int gnt_ref, remote_evtchn;
 	char *magic = NULL;

-	spin_lock(&pdev->dev_lock);

 	/* Make sure we only do this setup once */
 	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
@@ -168,7 +181,6 @@ static int pciback_attach(struct pciback_device *pdev)

 	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
 out:
-	spin_unlock(&pdev->dev_lock);

 	kfree(magic);

@@ -340,7 +352,6 @@ static int pciback_reconfigure(struct pciback_device *pdev)
 	char state_str[64];
 	char dev_str[64];

-	spin_lock(&pdev->dev_lock);

 	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");

@@ -481,8 +492,6 @@ static int pciback_reconfigure(struct pciback_device *pdev)
 	}

 out:
-	spin_unlock(&pdev->dev_lock);
-
 	return 0;
 }

@@ -539,8 +548,6 @@ static int pciback_setup_backend(struct pciback_device *pdev)
 	char dev_str[64];
 	char state_str[64];

-	spin_lock(&pdev->dev_lock);
-
 	/* It's possible we could get the call to setup twice, so make sure
 	 * we're not already connected.
 	 */
@@ -621,8 +628,6 @@ static int pciback_setup_backend(struct pciback_device *pdev)
 				 "Error switching to initialised state!");

 out:
-	spin_unlock(&pdev->dev_lock);
-
 	if (!err)
 		/* see if pcifront is already configured (if not, we'll wait) */
 		pciback_attach(pdev);
@@ -669,7 +674,10 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
 				pciback_be_watch);
 	if (err)
 		goto out;
+
+	spin_lock(&pdev->dev_lock);
 	pdev->be_watching = 1;
+	spin_unlock(&pdev->dev_lock);

 	/* We need to force a call to our callback here in case
 	 * xend already configured us!
@@ -708,8 +716,8 @@ int __init pciback_xenbus_register(void)
 {
 	pciback_wq = create_workqueue("pciback_workqueue");
 	if (!pciback_wq) {
-		printk(KERN_ERR "pciback_xenbus_register: create"
-			"pciback_workqueue failed\n");
+		printk(KERN_ERR "%s: create"
+			"pciback_workqueue failed\n",__FUNCTION__);
 		return -EFAULT;
 	}
 	return xenbus_register_backend(&xenbus_pciback_driver);
--
1.7.4


From a9e0cfab0577730e74787b701edc727756a52b11 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 28 Jul 2010 13:28:34 -0400
Subject: [PATCH 039/197] xen-pciback: Remove spinlock for be->watching state.

There is no need to guard this with a spinlock. It
is already guarded by the xenwatch_thread against multiple
customers.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/xenbus.c |    4 ----
 1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
index f0d5426..993b659 100644
--- a/drivers/xen/pciback/xenbus.c
+++ b/drivers/xen/pciback/xenbus.c
@@ -73,12 +73,10 @@ static void pciback_disconnect(struct pciback_device *pdev)

 static void free_pdev(struct pciback_device *pdev)
 {
-	spin_lock(&pdev->dev_lock);
 	if (pdev->be_watching) {
 		unregister_xenbus_watch(&pdev->be_watch);
 		pdev->be_watching = 0;
 	}
-	spin_unlock(&pdev->dev_lock);

 	pciback_disconnect(pdev);

@@ -675,9 +673,7 @@ static int pciback_xenbus_probe(struct xenbus_device *dev,
 	if (err)
 		goto out;

-	spin_lock(&pdev->dev_lock);
 	pdev->be_watching = 1;
-	spin_unlock(&pdev->dev_lock);

 	/* We need to force a call to our callback here in case
 	 * xend already configured us!
--
1.7.4


From c0cae0b36c43e75d4d69c60f5319e6ba802b2233 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 13 Dec 2010 11:06:36 -0500
Subject: [PATCH 040/197] xen/pciback: Fix checkpatch warnings and errors.

Checkpatch found some extra warnings and errors. This mega
patch fixes all of them in one swoop.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/pci.h                  |    2 +-
 drivers/xen/events.c                            |   38 ++++++++++----------
 drivers/xen/pciback/conf_space.c                |    4 +-
 drivers/xen/pciback/conf_space_capability_msi.c |   11 +++---
 drivers/xen/pciback/conf_space_header.c         |   42 +++++++++++-----------
 drivers/xen/pciback/controller.c                |    2 +-
 drivers/xen/pciback/pci_stub.c                  |    7 ++--
 drivers/xen/pciback/pciback.h                   |   16 ++++----
 drivers/xen/pciback/pciback_ops.c               |    9 ++---
 drivers/xen/pciback/xenbus.c                    |   14 ++++----
 10 files changed, 73 insertions(+), 72 deletions(-)

diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 8474b4b..7e61d78 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -27,7 +27,7 @@ static inline int xen_find_device_domain_owner(struct pci_dev *dev)
 	return -1;
 }
 static inline int xen_register_device_domain_owner(struct pci_dev *dev,
- 						   uint16_t domain)
+						   uint16_t domain)
 {
 	return -1;
 }
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 95eea13..3929c20 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -698,7 +698,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
 	domid = rc = xen_find_device_domain_owner(dev);
 	if (rc < 0)
 		domid = DOMID_SELF;
-
+
 	memset(&map_irq, 0, sizeof(map_irq));
 	map_irq.domid = domid;
 	map_irq.type = MAP_PIRQ_TYPE_MSI;
@@ -850,18 +850,18 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 }

 static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
-                                          unsigned int remote_port)
+					  unsigned int remote_port)
 {
-        struct evtchn_bind_interdomain bind_interdomain;
-        int err;
+	struct evtchn_bind_interdomain bind_interdomain;
+	int err;

-        bind_interdomain.remote_dom  = remote_domain;
-        bind_interdomain.remote_port = remote_port;
+	bind_interdomain.remote_dom  = remote_domain;
+	bind_interdomain.remote_port = remote_port;

-        err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
-                                          &bind_interdomain);
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+					  &bind_interdomain);

-        return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
+	return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
 }


@@ -966,19 +966,19 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
 					  const char *devname,
 					  void *dev_id)
 {
-        int irq, retval;
+	int irq, retval;

-        irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
-        if (irq < 0)
-                return irq;
+	irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
+	if (irq < 0)
+		return irq;

-        retval = request_irq(irq, handler, irqflags, devname, dev_id);
-        if (retval != 0) {
-                unbind_from_irq(irq);
-                return retval;
-        }
+	retval = request_irq(irq, handler, irqflags, devname, dev_id);
+	if (retval != 0) {
+		unbind_from_irq(irq);
+		return retval;
+	}

-        return irq;
+	return irq;
 }
 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);

diff --git a/drivers/xen/pciback/conf_space.c b/drivers/xen/pciback/conf_space.c
index 370c18e..eb6bba0 100644
--- a/drivers/xen/pciback/conf_space.c
+++ b/drivers/xen/pciback/conf_space.c
@@ -18,8 +18,8 @@
 static int permissive;
 module_param(permissive, bool, 0644);

-#define DEFINE_PCI_CONFIG(op, size, type) 			\
-int pciback_##op##_config_##size 				\
+#define DEFINE_PCI_CONFIG(op, size, type)			\
+int pciback_##op##_config_##size				\
 (struct pci_dev *dev, int offset, type value, void *data)	\
 {								\
 	return pci_##op##_config_##size(dev, offset, value);	\
diff --git a/drivers/xen/pciback/conf_space_capability_msi.c b/drivers/xen/pciback/conf_space_capability_msi.c
index b15131e..3acda69 100644
--- a/drivers/xen/pciback/conf_space_capability_msi.c
+++ b/drivers/xen/pciback/conf_space_capability_msi.c
@@ -16,7 +16,7 @@ int pciback_enable_msi(struct pciback_device *pdev,
 	int otherend = pdev->xdev->otherend_id;
 	int status;

- 	if (unlikely(verbose_request))
+	if (unlikely(verbose_request))
 		printk(KERN_DEBUG "pciback: %s: enable MSI\n", pci_name(dev));

 	status = pci_enable_msi(dev);
@@ -43,7 +43,7 @@ int pciback_disable_msi(struct pciback_device *pdev,
 {
 	struct pciback_dev_data *dev_data;

- 	if (unlikely(verbose_request))
+	if (unlikely(verbose_request))
 		printk(KERN_DEBUG "pciback: %s: disable MSI\n", pci_name(dev));
 	pci_disable_msi(dev);

@@ -61,7 +61,7 @@ int pciback_enable_msix(struct pciback_device *pdev,
 	int i, result;
 	struct msix_entry *entries;

- 	if (unlikely(verbose_request))
+	if (unlikely(verbose_request))
 		printk(KERN_DEBUG "pciback: %s: enable MSI-X\n", pci_name(dev));
 	if (op->value > SH_INFO_MAX_VEC)
 		return -EINVAL;
@@ -97,8 +97,9 @@ int pciback_disable_msix(struct pciback_device *pdev,
 		struct pci_dev *dev, struct xen_pci_op *op)
 {
 	struct pciback_dev_data *dev_data;
- 	if (unlikely(verbose_request))
-		printk(KERN_DEBUG "pciback: %s: disable MSI-X\n", pci_name(dev));
+	if (unlikely(verbose_request))
+		printk(KERN_DEBUG "pciback: %s: disable MSI-X\n",
+			pci_name(dev));
 	pci_disable_msix(dev);

 	op->value = xen_gsi_from_irq(dev->irq);
diff --git a/drivers/xen/pciback/conf_space_header.c b/drivers/xen/pciback/conf_space_header.c
index cb450f4..22ad0f5 100644
--- a/drivers/xen/pciback/conf_space_header.c
+++ b/drivers/xen/pciback/conf_space_header.c
@@ -316,27 +316,27 @@ static const struct config_field header_common[] = {
 	{}
 };

-#define CFG_FIELD_BAR(reg_offset) 			\
-	{ 						\
-	 .offset     = reg_offset, 			\
-	 .size       = 4, 				\
-	 .init       = bar_init, 			\
-	 .reset      = bar_reset, 			\
-	 .release    = bar_release, 			\
-	 .u.dw.read  = bar_read, 			\
-	 .u.dw.write = bar_write, 			\
-	 }
-
-#define CFG_FIELD_ROM(reg_offset) 			\
-	{ 						\
-	 .offset     = reg_offset, 			\
-	 .size       = 4, 				\
-	 .init       = rom_init, 			\
-	 .reset      = bar_reset, 			\
-	 .release    = bar_release, 			\
-	 .u.dw.read  = bar_read, 			\
-	 .u.dw.write = rom_write, 			\
-	 }
+#define CFG_FIELD_BAR(reg_offset)			\
+	{						\
+	.offset     = reg_offset,			\
+	.size       = 4,				\
+	.init       = bar_init,				\
+	.reset      = bar_reset,			\
+	.release    = bar_release,			\
+	.u.dw.read  = bar_read,				\
+	.u.dw.write = bar_write,			\
+	}
+
+#define CFG_FIELD_ROM(reg_offset)			\
+	{						\
+	.offset     = reg_offset,			\
+	.size       = 4,				\
+	.init       = rom_init,				\
+	.reset      = bar_reset,			\
+	.release    = bar_release,			\
+	.u.dw.read  = bar_read,				\
+	.u.dw.write = rom_write,			\
+	}

 static const struct config_field header_0[] = {
 	CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
diff --git a/drivers/xen/pciback/controller.c b/drivers/xen/pciback/controller.c
index 7f04f11..5a7e4cc 100644
--- a/drivers/xen/pciback/controller.c
+++ b/drivers/xen/pciback/controller.c
@@ -378,7 +378,7 @@ int pciback_publish_pci_roots(struct pciback_device *pdev,
 	}

 	err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
-			    "%lx", (sizeof(struct acpi_resource) * 2) + 1);
+			    "%lx", (sizeof(struct acpi_resource) *2) + 1);

 out:
 	spin_unlock(&dev_data->lock);
diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index 88c7ca1..c8f6f29 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -13,7 +13,7 @@
 #include <linux/pci.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <xen/events.h>
 #include <asm/xen/pci.h>
 #include <asm/xen/hypervisor.h>
@@ -603,7 +603,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
 	if (test_bit(_XEN_PCIF_active,
 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
 		dev_dbg(&psdev->dev->dev,
-			"schedule pci_conf service in pciback \n");
+			"schedule pci_conf service in pciback\n");
 		test_and_schedule_op(psdev->pdev);
 	}

@@ -1055,7 +1055,8 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
 		if (!dev_data)
 			continue;
 		count +=
-		    scnprintf(buf + count, PAGE_SIZE - count, "%s:%s:%sing:%ld\n",
+		    scnprintf(buf + count, PAGE_SIZE - count,
+			      "%s:%s:%sing:%ld\n",
 			      pci_name(psdev->dev),
 			      dev_data->isr_on ? "on" : "off",
 			      dev_data->ack_intr ? "ack" : "not ack",
diff --git a/drivers/xen/pciback/pciback.h b/drivers/xen/pciback/pciback.h
index fc31052..5c14020 100644
--- a/drivers/xen/pciback/pciback.h
+++ b/drivers/xen/pciback/pciback.h
@@ -12,7 +12,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <xen/interface/io/pciif.h>

 struct pci_dev_entry {
@@ -20,8 +20,8 @@ struct pci_dev_entry {
 	struct pci_dev *dev;
 };

-#define _PDEVF_op_active 	(0)
-#define PDEVF_op_active 	(1<<(_PDEVF_op_active))
+#define _PDEVF_op_active	(0)
+#define PDEVF_op_active		(1<<(_PDEVF_op_active))
 #define _PCIB_op_pending	(1)
 #define PCIB_op_pending		(1<<(_PCIB_op_pending))

@@ -45,11 +45,11 @@ struct pciback_device {

 struct pciback_dev_data {
 	struct list_head config_fields;
-	unsigned int permissive : 1;
-	unsigned int warned_on_write : 1;
-	unsigned int enable_intx : 1;
-	unsigned int isr_on : 1; /* Whether the IRQ handler is installed. */
-	unsigned int ack_intr : 1; /* .. and ACK-ing */
+	unsigned int permissive:1;
+	unsigned int warned_on_write:1;
+	unsigned int enable_intx:1;
+	unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
+	unsigned int ack_intr:1; /* .. and ACK-ing */
 	unsigned long handled;
 	unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
 	char irq_name[0]; /* pciback[000:04:00.0] */
diff --git a/drivers/xen/pciback/pciback_ops.c b/drivers/xen/pciback/pciback_ops.c
index 5543881..9a465e9 100644
--- a/drivers/xen/pciback/pciback_ops.c
+++ b/drivers/xen/pciback/pciback_ops.c
@@ -63,12 +63,11 @@ void pciback_control_isr(struct pci_dev *dev, int reset)
 				dev_data->irq_name, dev);
 		if (rc) {
 			dev_err(&dev->dev, "%s: failed to install fake IRQ " \
-				"handler for IRQ %d! (rc:%d)\n", dev_data->irq_name,
-				dev_data->irq, rc);
+				"handler for IRQ %d! (rc:%d)\n",
+				dev_data->irq_name, dev_data->irq, rc);
 			goto out;
 		}
-	}
-	else {
+	} else {
 		free_irq(dev_data->irq, dev);
 		dev_data->irq = 0;
 	}
@@ -188,7 +187,7 @@ void pciback_do_op(struct work_struct *data)
 			break;
 #endif
 		default:
-			op->err = XEN_PCI_ERR_not_implemented;
+			op->err = XEN_PCI_ERR_not_implemented;
 			break;
 		}
 	}
diff --git a/drivers/xen/pciback/xenbus.c b/drivers/xen/pciback/xenbus.c
index 993b659..70030c4 100644
--- a/drivers/xen/pciback/xenbus.c
+++ b/drivers/xen/pciback/xenbus.c
@@ -700,12 +700,12 @@ static const struct xenbus_device_id xenpci_ids[] = {
 };

 static struct xenbus_driver xenbus_pciback_driver = {
-	.name 			= "pciback",
-	.owner 			= THIS_MODULE,
-	.ids 			= xenpci_ids,
-	.probe 			= pciback_xenbus_probe,
-	.remove 		= pciback_xenbus_remove,
-	.otherend_changed 	= pciback_frontend_changed,
+	.name			= "pciback",
+	.owner			= THIS_MODULE,
+	.ids			= xenpci_ids,
+	.probe			= pciback_xenbus_probe,
+	.remove			= pciback_xenbus_remove,
+	.otherend_changed	= pciback_frontend_changed,
 };

 int __init pciback_xenbus_register(void)
@@ -713,7 +713,7 @@ int __init pciback_xenbus_register(void)
 	pciback_wq = create_workqueue("pciback_workqueue");
 	if (!pciback_wq) {
 		printk(KERN_ERR "%s: create"
-			"pciback_workqueue failed\n",__FUNCTION__);
+			"pciback_workqueue failed\n", __func__);
 		return -EFAULT;
 	}
 	return xenbus_register_backend(&xenbus_pciback_driver);
--
1.7.4


From 83d24d8dbd9e52a7ac94deae2d9fff6681ce8761 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 13 Dec 2010 11:30:29 -0500
Subject: [PATCH 041/197] xen/xen-pciback: Swap over to DEFINE_PCI_DEVICE_TABLE

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/pciback/pci_stub.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/pciback/pci_stub.c b/drivers/xen/pciback/pci_stub.c
index c8f6f29..09dd60c 100644
--- a/drivers/xen/pciback/pci_stub.c
+++ b/drivers/xen/pciback/pci_stub.c
@@ -497,7 +497,7 @@ static void pcistub_remove(struct pci_dev *dev)
 	}
 }

-static const struct pci_device_id pcistub_ids[] = {
+DEFINE_PCI_DEVICE_TABLE(pcistub_ids) = {
 	{
 	 .vendor = PCI_ANY_ID,
 	 .device = PCI_ANY_ID,
--
1.7.4


From bd7ac7bbb2f43413bac67cff6536096094e0881a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 22 Dec 2010 13:09:40 -0800
Subject: [PATCH 042/197] xen: use normal virt_to_machine for ptes

We no longer support HIGHPTE allocations, so ptes should always be
within the kernel's direct map, and don't need pagetable walks
to convert to machine addresses.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   10 +++++-----
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b38bfdc..dc18bea 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -610,7 +610,7 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
 	u = mcs.args;

 	/* ptep might be kmapped when using 32-bit HIGHPTE */
-	u->ptr = arbitrary_virt_to_machine(ptep).maddr;
+	u->ptr = virt_to_machine(ptep).maddr;
 	u->val = pte_val_ma(pteval);

 	MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
@@ -656,7 +656,7 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
 	xen_mc_batch();

 	/* ptr may be ioremapped for 64-bit pagetable setup */
-	u.ptr = arbitrary_virt_to_machine(ptr).maddr;
+	u.ptr = virt_to_machine(ptr).maddr;
 	u.val = pmd_val_ma(val);
 	xen_extend_mmu_update(&u);

@@ -739,7 +739,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,

 	xen_mc_batch();

-	u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
+	u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
 	u.val = pte_val_ma(pte);
 	xen_extend_mmu_update(&u);

@@ -904,7 +904,7 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 	xen_mc_batch();

 	/* ptr may be ioremapped for 64-bit pagetable setup */
-	u.ptr = arbitrary_virt_to_machine(ptr).maddr;
+	u.ptr = virt_to_machine(ptr).maddr;
 	u.val = pud_val_ma(val);
 	xen_extend_mmu_update(&u);

@@ -2622,7 +2622,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
 	struct remap_data *rmd = data;
 	pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));

-	rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
+	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
 	rmd->mmu_update->val = pte_val_ma(pte);
 	rmd->mmu_update++;

--
1.7.4


From d3b33a21a502887d78a561de5c6f7ac1fbb31593 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 16 Dec 2010 15:50:17 -0800
Subject: [PATCH 043/197] xen/mmu: remove all ad-hoc stats stuff

To make way for tracing.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |  137 ----------------------------------------------------
 1 files changed, 0 insertions(+), 137 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index dc18bea..54920a3 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -74,8 +74,6 @@
 #include "mmu.h"
 #include "debugfs.h"

-#define MMU_UPDATE_HISTO	30
-
 /*
  * Protects atomic reservation decrease/increase against concurrent increases.
  * Also protects non-atomic updates of current_pages and driver_pages, and
@@ -83,53 +81,6 @@
  */
 DEFINE_SPINLOCK(xen_reservation_lock);

-#ifdef CONFIG_XEN_DEBUG_FS
-
-static struct {
-	u32 pgd_update;
-	u32 pgd_update_pinned;
-	u32 pgd_update_batched;
-
-	u32 pud_update;
-	u32 pud_update_pinned;
-	u32 pud_update_batched;
-
-	u32 pmd_update;
-	u32 pmd_update_pinned;
-	u32 pmd_update_batched;
-
-	u32 pte_update;
-	u32 pte_update_pinned;
-	u32 pte_update_batched;
-
-	u32 mmu_update;
-	u32 mmu_update_extended;
-	u32 mmu_update_histo[MMU_UPDATE_HISTO];
-
-	u32 prot_commit;
-	u32 prot_commit_batched;
-} mmu_stats;
-
-static u8 zero_stats;
-
-static inline void check_zero(void)
-{
-	if (unlikely(zero_stats)) {
-		memset(&mmu_stats, 0, sizeof(mmu_stats));
-		zero_stats = 0;
-	}
-}
-
-#define ADD_STATS(elem, val)			\
-	do { check_zero(); mmu_stats.elem += (val); } while(0)
-
-#else  /* !CONFIG_XEN_DEBUG_FS */
-
-#define ADD_STATS(elem, val)	do { (void)(val); } while(0)
-
-#endif /* CONFIG_XEN_DEBUG_FS */
-
-
 /*
  * Identity map, in addition to plain kernel map.  This needs to be
  * large enough to allocate page table pages to allocate the rest.
@@ -627,20 +578,10 @@ static void xen_extend_mmu_update(const struct mmu_update *update)
 	mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));

 	if (mcs.mc != NULL) {
-		ADD_STATS(mmu_update_extended, 1);
-		ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1);
-
 		mcs.mc->args[1]++;
-
-		if (mcs.mc->args[1] < MMU_UPDATE_HISTO)
-			ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1);
-		else
-			ADD_STATS(mmu_update_histo[0], 1);
 	} else {
-		ADD_STATS(mmu_update, 1);
 		mcs = __xen_mc_entry(sizeof(*u));
 		MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
-		ADD_STATS(mmu_update_histo[1], 1);
 	}

 	u = mcs.args;
@@ -660,8 +601,6 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
 	u.val = pmd_val_ma(val);
 	xen_extend_mmu_update(&u);

-	ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
 	xen_mc_issue(PARAVIRT_LAZY_MMU);

 	preempt_enable();
@@ -669,8 +608,6 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)

 static void xen_set_pmd(pmd_t *ptr, pmd_t val)
 {
-	ADD_STATS(pmd_update, 1);
-
 	/* If page is not pinned, we can just update the entry
 	   directly */
 	if (!xen_page_pinned(ptr)) {
@@ -678,8 +615,6 @@ static void xen_set_pmd(pmd_t *ptr, pmd_t val)
 		return;
 	}

-	ADD_STATS(pmd_update_pinned, 1);
-
 	xen_set_pmd_hyper(ptr, val);
 }

@@ -712,9 +647,6 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)

 static void xen_set_pte(pte_t *ptep, pte_t pteval)
 {
-	ADD_STATS(pte_update, 1);
-//	ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
-
 	if (!xen_batched_set_pte(ptep, pteval))
 		native_set_pte(ptep, pteval);
 }
@@ -743,9 +675,6 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 	u.val = pte_val_ma(pte);
 	xen_extend_mmu_update(&u);

-	ADD_STATS(prot_commit, 1);
-	ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }

@@ -908,8 +837,6 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 	u.val = pud_val_ma(val);
 	xen_extend_mmu_update(&u);

-	ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
 	xen_mc_issue(PARAVIRT_LAZY_MMU);

 	preempt_enable();
@@ -917,8 +844,6 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val)

 static void xen_set_pud(pud_t *ptr, pud_t val)
 {
-	ADD_STATS(pud_update, 1);
-
 	/* If page is not pinned, we can just update the entry
 	   directly */
 	if (!xen_page_pinned(ptr)) {
@@ -926,8 +851,6 @@ static void xen_set_pud(pud_t *ptr, pud_t val)
 		return;
 	}

-	ADD_STATS(pud_update_pinned, 1);
-
 	xen_set_pud_hyper(ptr, val);
 }

@@ -1020,8 +943,6 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
 {
 	pgd_t *user_ptr = xen_get_user_pgd(ptr);

-	ADD_STATS(pgd_update, 1);
-
 	/* If page is not pinned, we can just update the entry
 	   directly */
 	if (!xen_page_pinned(ptr)) {
@@ -1033,9 +954,6 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
 		return;
 	}

-	ADD_STATS(pgd_update_pinned, 1);
-	ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
 	/* If it's pinned, then we can at least batch the kernel and
 	   user updates together. */
 	xen_mc_batch();
@@ -2674,58 +2592,3 @@ out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
-
-#ifdef CONFIG_XEN_DEBUG_FS
-
-static struct dentry *d_mmu_debug;
-
-static int __init xen_mmu_debugfs(void)
-{
-	struct dentry *d_xen = xen_init_debugfs();
-
-	if (d_xen == NULL)
-		return -ENOMEM;
-
-	d_mmu_debug = debugfs_create_dir("mmu", d_xen);
-
-	debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats);
-
-	debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update);
-	debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug,
-			   &mmu_stats.pgd_update_pinned);
-	debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug,
-			   &mmu_stats.pgd_update_pinned);
-
-	debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update);
-	debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug,
-			   &mmu_stats.pud_update_pinned);
-	debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug,
-			   &mmu_stats.pud_update_pinned);
-
-	debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update);
-	debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug,
-			   &mmu_stats.pmd_update_pinned);
-	debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug,
-			   &mmu_stats.pmd_update_pinned);
-
-	debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update);
-//	debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug,
-//			   &mmu_stats.pte_update_pinned);
-	debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug,
-			   &mmu_stats.pte_update_pinned);
-
-	debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update);
-	debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug,
-			   &mmu_stats.mmu_update_extended);
-	xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
-				     mmu_stats.mmu_update_histo, 20);
-
-	debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
-	debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
-			   &mmu_stats.prot_commit_batched);
-
-	return 0;
-}
-fs_initcall(xen_mmu_debugfs);
-
-#endif	/* CONFIG_XEN_DEBUG_FS */
--
1.7.4


From 105aad363797212fbd2a4c887b723407c5851175 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 9 Dec 2010 15:01:11 -0500
Subject: [PATCH 045/197] xen/irq: Don't fall over when nr_irqs_gsi > nr_irqs.

This scenario where the nr_irq_gsi is greater than nr_irqs
is rather strange but lets still try to survive. Make sure
to print a warning so the user wouldn't be surprised in case
things don't work.

Solves a bootup-crash when booting Xen and Linux under QEMU.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/events.c |    9 +++++++++
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 4d4a23d..98b7220 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -411,6 +411,7 @@ static int find_unbound_irq(void)
 	if (bottom == nr_irqs)
 		goto no_irqs;

+retry:
 	/* This loop starts from the top of IRQ space and goes down.
 	 * We need this b/c if we have a PCI device in a Xen PV guest
 	 * we do not have an IO-APIC (though the backend might have them)
@@ -434,6 +435,14 @@ static int find_unbound_irq(void)
 		goto no_irqs;

 	res = irq_alloc_desc_at(irq, -1);
+	if (res == -EEXIST) {
+		top--;
+		if (bottom > top)
+			printk(KERN_ERR "Eating in GSI/MSI space (%d)!" \
+				" Your PCI device might not work!\n", top);
+		if (top > NR_IRQS_LEGACY)
+			goto retry;
+	}

 	if (WARN_ON(res != irq))
 		return -1;
--
1.7.4


From bfb56cd0b9304ddc6cfb411315bf7e5fea3e8bc7 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 11 Jan 2011 17:20:13 +0000
Subject: [PATCH 046/197] xen: handled remapped IRQs when enabling a pcifront PCI device.

This happens to not be an issue currently because we take pains to try
to ensure that the GSI-IRQ mapping is 1-1 in a PV guest and that
regular event channels do not clash. However a subsequent patch is
going to break this 1-1 mapping.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
---
 arch/x86/pci/xen.c |   22 ++++++++++++++--------
 1 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 25cd4a0..2a12f3d 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -226,21 +226,27 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev)
 {
 	int rc;
 	int share = 1;
+	u8 gsi;

-	dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
-
-	if (dev->irq < 0)
-		return -EINVAL;
+	rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
+	if (rc < 0) {
+		dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
+			 rc);
+		return rc;
+	}

-	if (dev->irq < NR_IRQS_LEGACY)
+	if (gsi < NR_IRQS_LEGACY)
 		share = 0;

-	rc = xen_allocate_pirq(dev->irq, share, "pcifront");
+	rc = xen_allocate_pirq(gsi, share, "pcifront");
 	if (rc < 0) {
-		dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
-			 dev->irq, rc);
+		dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
+			 gsi, rc);
 		return rc;
 	}
+
+	dev->irq = rc;
+	dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
 	return 0;
 }

--
1.7.4


From 80b3b503bba489dcbdd808c5dd50a6be3aa06949 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 11 Jan 2011 17:20:14 +0000
Subject: [PATCH 047/197] xen:events: move find_unbound_irq inside CONFIG_PCI_MSI

The only caller is xen_allocate_pirq_msi which is also under this
ifdef so this fixes:
    drivers/xen/events.c:377: warning: 'find_unbound_pirq' defined but not used
when CONFIG_PCI_MSI=n

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
---
 drivers/xen/events.c |   34 +++++++++++++++++-----------------
 1 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 98b7220..ae8d45d 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -384,23 +384,6 @@ static int get_nr_hw_irqs(void)
 	return ret;
 }

-static int find_unbound_pirq(int type)
-{
-	int rc, i;
-	struct physdev_get_free_pirq op_get_free_pirq;
-	op_get_free_pirq.type = type;
-
-	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
-	if (!rc)
-		return op_get_free_pirq.pirq;
-
-	for (i = 0; i < nr_irqs; i++) {
-		if (pirq_to_irq[i] < 0)
-			return i;
-	}
-	return -1;
-}
-
 static int find_unbound_irq(void)
 {
 	struct irq_data *data;
@@ -683,6 +666,23 @@ out:
 #include <linux/msi.h>
 #include "../pci/msi.h"

+static int find_unbound_pirq(int type)
+{
+	int rc, i;
+	struct physdev_get_free_pirq op_get_free_pirq;
+	op_get_free_pirq.type = type;
+
+	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
+	if (!rc)
+		return op_get_free_pirq.pirq;
+
+	for (i = 0; i < nr_irqs; i++) {
+		if (pirq_to_irq[i] < 0)
+			return i;
+	}
+	return -1;
+}
+
 void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
 {
 	spin_lock(&irq_mapping_update_lock);
--
1.7.4


From c514d00c80574e839d34c239363153b90bb8efcc Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 11 Jan 2011 17:20:15 +0000
Subject: [PATCH 048/197] xen: events: add xen_allocate_irq_{dynamic, gsi} and xen_free_irq

This is neater than open-coded calls to irq_alloc_desc_at and
irq_free_desc.

No intended behavioural change.

Note that we previously were not checking the return value of
irq_alloc_desc_at which would be failing for GSI<NR_IRQS_LEGACY
because the core architecture code has already allocated those for
us. Hence the additional check against NR_IRQS_LEGACY in
xen_allocate_irq_gsi.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
---
 drivers/xen/events.c |   53 +++++++++++++++++++++++++++++++++-----------------
 1 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ae8d45d..74fb216 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -384,7 +384,7 @@ static int get_nr_hw_irqs(void)
 	return ret;
 }

-static int find_unbound_irq(void)
+static int xen_allocate_irq_dynamic(void)
 {
 	struct irq_data *data;
 	int irq, res;
@@ -442,6 +442,30 @@ static bool identity_mapped_irq(unsigned irq)
 	return irq < get_nr_hw_irqs();
 }

+static int xen_allocate_irq_gsi(unsigned gsi)
+{
+	int irq;
+
+	if (!identity_mapped_irq(gsi) &&
+	    (xen_initial_domain() || !xen_pv_domain()))
+		return xen_allocate_irq_dynamic();
+
+	/* Legacy IRQ descriptors are already allocated by the arch. */
+	if (gsi < NR_IRQS_LEGACY)
+		return gsi;
+
+	irq = irq_alloc_desc_at(gsi, -1);
+	if (irq < 0)
+		panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
+
+	return irq;
+}
+
+static void xen_free_irq(unsigned irq)
+{
+	irq_free_desc(irq);
+}
+
 static void pirq_unmask_notify(int irq)
 {
 	struct physdev_eoi eoi = { .irq = pirq_from_irq(irq) };
@@ -627,14 +651,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
 		goto out;	/* XXX need refcount? */
 	}

-	/* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
-	 * we are using the !xen_initial_domain() to drop in the function.*/
-	if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
-				xen_pv_domain())) {
-		irq = gsi;
-		irq_alloc_desc_at(irq, -1);
-	} else
-		irq = find_unbound_irq();
+	irq = xen_allocate_irq_gsi(gsi);

 	set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
 				      handle_level_irq, name);
@@ -647,7 +664,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
 	 * this in the priv domain. */
 	if (xen_initial_domain() &&
 	    HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
-		irq_free_desc(irq);
+		xen_free_irq(irq);
 		irq = -ENOSPC;
 		goto out;
 	}
@@ -688,7 +705,7 @@ void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
 	spin_lock(&irq_mapping_update_lock);

 	if (alloc & XEN_ALLOC_IRQ) {
-		*irq = find_unbound_irq();
+		*irq = xen_allocate_irq_dynamic();
 		if (*irq == -1)
 			goto out;
 	}
@@ -738,7 +755,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)

 	spin_lock(&irq_mapping_update_lock);

-	irq = find_unbound_irq();
+	irq = xen_allocate_irq_dynamic();

 	if (irq == -1)
 		goto out;
@@ -747,7 +764,7 @@ int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
 	if (rc) {
 		printk(KERN_WARNING "xen map irq failed %d\n", rc);

-		irq_free_desc(irq);
+		xen_free_irq(irq);

 		irq = -1;
 		goto out;
@@ -789,7 +806,7 @@ int xen_destroy_irq(int irq)
 	}
 	irq_info[irq] = mk_unbound_info();

-	irq_free_desc(irq);
+	xen_free_irq(irq);

 out:
 	spin_unlock(&irq_mapping_update_lock);
@@ -820,7 +837,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
 	irq = evtchn_to_irq[evtchn];

 	if (irq == -1) {
-		irq = find_unbound_irq();
+		irq = xen_allocate_irq_dynamic();

 		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
 					      handle_fasteoi_irq, "event");
@@ -845,7 +862,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 	irq = per_cpu(ipi_to_irq, cpu)[ipi];

 	if (irq == -1) {
-		irq = find_unbound_irq();
+		irq = xen_allocate_irq_dynamic();
 		if (irq < 0)
 			goto out;

@@ -881,7 +898,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 	irq = per_cpu(virq_to_irq, cpu)[virq];

 	if (irq == -1) {
-		irq = find_unbound_irq();
+		irq = xen_allocate_irq_dynamic();

 		set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
 					      handle_percpu_irq, "virq");
@@ -940,7 +957,7 @@ static void unbind_from_irq(unsigned int irq)
 	if (irq_info[irq].type != IRQT_UNBOUND) {
 		irq_info[irq] = mk_unbound_info();

-		irq_free_desc(irq);
+		xen_free_irq(irq);
 	}

 	spin_unlock(&irq_mapping_update_lock);
--
1.7.4


From 323430f2697268e6261b673fa2b86d6f3f3c7cff Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 11 Jan 2011 17:20:16 +0000
Subject: [PATCH 049/197] xen: events: allocate GSIs and dynamic IRQs from separate IRQ ranges.

There are three cases which we need to care about, PV guest, PV domain
0 and HVM guest.

The PV guest case is simple since it has no access to ACPI or real
APICs and therefore has no GSIs therefore we simply dynamically
allocate all IRQs. The potentially interesting case here is PIRQ type
event channels associated with passed through PCI devices. However
even in this case the guest has no direct interaction with the
physical GSI since that happens in the PCI backend.

The PV domain 0 and HVM guest cases are actually the same. In domain 0
case the kernel sees the host ACPI and GSIs (although it only sees the
APIC indirectly via the hypervisor) and in the HVM guest case it sees
the virtualised ACPI and emulated APICs. In these cases we start
allocating dynamic IRQs at nr_irqs_gsi so that they cannot clash with
any GSI.

Currently xen_allocate_irq_dynamic starts at nr_irqs and works
backwards looking for a free IRQ in order to (try and) avoid clashing
with GSIs used in domain 0 and in HVM guests. This change avoids that
although we retain the behaviour of allowing dynamic IRQs to encroach
on the GSI range if no suitable IRQs are available since a future IRQ
clash is deemed preferable to failure right now.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
---
 drivers/xen/events.c |   84 +++++++++++++++----------------------------------
 1 files changed, 26 insertions(+), 58 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 74fb216..a7b60f6 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -373,81 +373,49 @@ static void unmask_evtchn(int port)
 	put_cpu();
 }

-static int get_nr_hw_irqs(void)
+static int xen_allocate_irq_dynamic(void)
 {
-	int ret = 1;
+	int first = 0;
+	int irq;

 #ifdef CONFIG_X86_IO_APIC
-	ret = get_nr_irqs_gsi();
+	/*
+	 * For an HVM guest or domain 0 which see "real" (emulated or
+	 * actual repectively) GSIs we allocate dynamic IRQs
+	 * e.g. those corresponding to event channels or MSIs
+	 * etc. from the range above those "real" GSIs to avoid
+	 * collisions.
+	 */
+	if (xen_initial_domain() || xen_hvm_domain())
+		first = get_nr_irqs_gsi();
 #endif

-	return ret;
-}
-
-static int xen_allocate_irq_dynamic(void)
-{
-	struct irq_data *data;
-	int irq, res;
-	int bottom = get_nr_hw_irqs();
-	int top = nr_irqs-1;
-
-	if (bottom == nr_irqs)
-		goto no_irqs;
-
 retry:
-	/* This loop starts from the top of IRQ space and goes down.
-	 * We need this b/c if we have a PCI device in a Xen PV guest
-	 * we do not have an IO-APIC (though the backend might have them)
-	 * mapped in. To not have a collision of physical IRQs with the Xen
-	 * event channels start at the top of the IRQ space for virtual IRQs.
-	 */
-	for (irq = top; irq > bottom; irq--) {
-		data = irq_get_irq_data(irq);
-		/* only 15->0 have init'd desc; handle irq > 16 */
-		if (!data)
-			break;
-		if (data->chip == &no_irq_chip)
-			break;
-		if (data->chip != &xen_dynamic_chip)
-			continue;
-		if (irq_info[irq].type == IRQT_UNBOUND)
-			return irq;
-	}
+	irq = irq_alloc_desc_from(first, -1);

-	if (irq == bottom)
-		goto no_irqs;
-
-	res = irq_alloc_desc_at(irq, -1);
-	if (res == -EEXIST) {
-		top--;
-		if (bottom > top)
-			printk(KERN_ERR "Eating in GSI/MSI space (%d)!" \
-				" Your PCI device might not work!\n", top);
-		if (top > NR_IRQS_LEGACY)
-			goto retry;
+	if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
+		printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
+		first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
+		goto retry;
 	}

-	if (WARN_ON(res != irq))
-		return -1;
+	if (irq < 0)
+		panic("No available IRQ to bind to: increase nr_irqs!\n");

 	return irq;
-
-no_irqs:
-	panic("No available IRQ to bind to: increase nr_irqs!\n");
-}
-
-static bool identity_mapped_irq(unsigned irq)
-{
-	/* identity map all the hardware irqs */
-	return irq < get_nr_hw_irqs();
 }

 static int xen_allocate_irq_gsi(unsigned gsi)
 {
 	int irq;

-	if (!identity_mapped_irq(gsi) &&
-	    (xen_initial_domain() || !xen_pv_domain()))
+	/*
+	 * A PV guest has no concept of a GSI (since it has no ACPI
+	 * nor access to/knowledge of the physical APICs). Therefore
+	 * all IRQs are dynamically allocated from the entire IRQ
+	 * space.
+	 */
+	if (xen_pv_domain() && !xen_initial_domain())
 		return xen_allocate_irq_dynamic();

 	/* Legacy IRQ descriptors are already allocated by the arch. */
--
1.7.4


From c986ab83cede3fc02d9f73c65dd83c20ebdf3d0e Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 29 Nov 2010 13:52:18 -0500
Subject: [PATCH 050/197] ttm: Introduce a placeholder for DMA (bus) addresses.

This is right now limited to only non-pool constructs.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c |    9 ++++++---
 drivers/gpu/drm/ttm/ttm_tt.c         |   10 ++++++++--
 include/drm/ttm/ttm_bo_driver.h      |    2 ++
 include/drm/ttm/ttm_page_alloc.h     |    8 ++++++--
 4 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index b1e02ff..6859288 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -38,6 +38,7 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h> /* for seq_printf */
 #include <linux/slab.h>
+#include <linux/dma-mapping.h>

 #include <asm/atomic.h>

@@ -662,7 +663,8 @@ out:
  * cached pages.
  */
 int ttm_get_pages(struct list_head *pages, int flags,
-		enum ttm_caching_state cstate, unsigned count)
+		enum ttm_caching_state cstate, unsigned count,
+		dma_addr_t *dma_address)
 {
 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
 	struct page *p = NULL;
@@ -720,7 +722,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
 			printk(KERN_ERR TTM_PFX
 			       "Failed to allocate extra pages "
 			       "for large request.");
-			ttm_put_pages(pages, 0, flags, cstate);
+			ttm_put_pages(pages, 0, flags, cstate, NULL);
 			return r;
 		}
 	}
@@ -731,7 +733,8 @@ int ttm_get_pages(struct list_head *pages, int flags,

 /* Put all pages in pages list to correct pool to wait for reuse */
 void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
-		enum ttm_caching_state cstate)
+		enum ttm_caching_state cstate,
+		dma_addr_t *dma_address)
 {
 	unsigned long irq_flags;
 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index af789dc..0d39001 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -49,12 +49,16 @@ static int ttm_tt_swapin(struct ttm_tt *ttm);
 static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm)
 {
 	ttm->pages = drm_calloc_large(ttm->num_pages, sizeof(*ttm->pages));
+	ttm->dma_address = drm_calloc_large(ttm->num_pages,
+					    sizeof(*ttm->dma_address));
 }

 static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
 {
 	drm_free_large(ttm->pages);
 	ttm->pages = NULL;
+	drm_free_large(ttm->dma_address);
+	ttm->dma_address = NULL;
 }

 static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
@@ -105,7 +109,8 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)

 		INIT_LIST_HEAD(&h);

-		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
+		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
+				    &ttm->dma_address[index]);

 		if (ret != 0)
 			return NULL;
@@ -298,7 +303,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 			count++;
 		}
 	}
-	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state);
+	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
+		      ttm->dma_address);
 	ttm->state = tt_unpopulated;
 	ttm->first_himem_page = ttm->num_pages;
 	ttm->last_lomem_page = -1;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 8e0c848..6dc4fcc 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -149,6 +149,7 @@ enum ttm_caching_state {
  * @swap_storage: Pointer to shmem struct file for swap storage.
  * @caching_state: The current caching state of the pages.
  * @state: The current binding state of the pages.
+ * @dma_address: The DMA (bus) addresses of the pages (if TTM_PAGE_FLAG_DMA32)
  *
  * This is a structure holding the pages, caching- and aperture binding
  * status for a buffer object that isn't backed by fixed (VRAM / AGP)
@@ -173,6 +174,7 @@ struct ttm_tt {
 		tt_unbound,
 		tt_unpopulated,
 	} state;
+	dma_addr_t *dma_address;
 };

 #define TTM_MEMTYPE_FLAG_FIXED         (1 << 0)	/* Fixed (on-card) PCI memory */
diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
index 1168214..8062890 100644
--- a/include/drm/ttm/ttm_page_alloc.h
+++ b/include/drm/ttm/ttm_page_alloc.h
@@ -36,11 +36,13 @@
  * @flags: ttm flags for page allocation.
  * @cstate: ttm caching state for the page.
  * @count: number of pages to allocate.
+ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
  */
 int ttm_get_pages(struct list_head *pages,
 		  int flags,
 		  enum ttm_caching_state cstate,
-		  unsigned count);
+		  unsigned count,
+		  dma_addr_t *dma_address);
 /**
  * Put linked list of pages to pool.
  *
@@ -49,11 +51,13 @@ int ttm_get_pages(struct list_head *pages,
  * count.
  * @flags: ttm flags for page allocation.
  * @cstate: ttm caching state.
+ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
  */
 void ttm_put_pages(struct list_head *pages,
 		   unsigned page_count,
 		   int flags,
-		   enum ttm_caching_state cstate);
+		   enum ttm_caching_state cstate,
+		   dma_addr_t *dma_address);
 /**
  * Initialize pool allocator.
  */
--
1.7.4


From ed805774fd3e3a8a0baa003d6419c5bd07d8cc5a Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 29 Nov 2010 14:03:30 -0500
Subject: [PATCH 051/197] tm: Utilize the dma_addr_t array for pages that are to in DMA32 pool.

We only use the "if (pool == NULL)" path for right now.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c |   26 +++++++++++++++++++++++---
 1 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 6859288..5d09677 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -683,14 +683,22 @@ int ttm_get_pages(struct list_head *pages, int flags,
 			gfp_flags |= GFP_HIGHUSER;

 		for (r = 0; r < count; ++r) {
-			p = alloc_page(gfp_flags);
+			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
+				void *addr;
+				addr = dma_alloc_coherent(NULL, PAGE_SIZE,
+							&dma_address[r],
+							gfp_flags);
+				if (addr == NULL)
+					return -ENOMEM;
+				p = virt_to_page(addr);
+			} else
+				p = alloc_page(gfp_flags);
 			if (!p) {

 				printk(KERN_ERR TTM_PFX
 				       "Unable to allocate page.");
 				return -ENOMEM;
 			}
-
 			list_add(&p->lru, pages);
 		}
 		return 0;
@@ -739,12 +747,24 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
 	unsigned long irq_flags;
 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
 	struct page *p, *tmp;
+	unsigned r;

 	if (pool == NULL) {
 		/* No pool for this memory type so free the pages */

+		r = page_count-1;
 		list_for_each_entry_safe(p, tmp, pages, lru) {
-			__free_page(p);
+			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
+				void *addr = page_address(p);
+				WARN_ON(!addr || !dma_address[r]);
+				if (addr)
+					dma_free_coherent(NULL, PAGE_SIZE,
+							addr,
+							dma_address[r]);
+				dma_address[r] = 0;
+			} else
+				__free_page(p);
+			r--;
 		}
 		/* Make the pages list empty */
 		INIT_LIST_HEAD(pages);
--
1.7.4


From c779160e3b0246d7de606eafb855df9b283a5c2a Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 2 Dec 2010 10:24:13 -0500
Subject: [PATCH 052/197] ttm: Expand (*populate) to support an array of DMA addresses.

We pass in the array of ttm pages to be populated in the GART/MM
of the card (or AGP). Patch titled: "ttm: Utilize the dma_addr_t array
for pages that are to in DMA32 pool." uses the DMA API to make those
pages have a proper DMA addresses (in the situation where
page_to_phys or virt_to_phys do not give use the DMA (bus) address).

Since we are using the DMA API on those pages, we should pass in the
DMA address to this function so it can save it in its proper fields
(later patches use it).

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/gpu/drm/nouveau/nouveau_sgdma.c |    3 ++-
 drivers/gpu/drm/radeon/radeon_ttm.c     |    3 ++-
 drivers/gpu/drm/ttm/ttm_agp_backend.c   |    3 ++-
 drivers/gpu/drm/ttm/ttm_tt.c            |    2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c  |    3 ++-
 include/drm/ttm/ttm_bo_driver.h         |    4 +++-
 6 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 288baca..edc140a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -20,7 +20,8 @@ struct nouveau_sgdma_be {

 static int
 nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
-		       struct page **pages, struct page *dummy_read_page)
+		       struct page **pages, struct page *dummy_read_page,
+		       dma_addr_t *dma_addrs)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
 	struct drm_device *dev = nvbe->dev;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 01c2c73..6f156e9 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -655,7 +655,8 @@ struct radeon_ttm_backend {
 static int radeon_ttm_backend_populate(struct ttm_backend *backend,
 				       unsigned long num_pages,
 				       struct page **pages,
-				       struct page *dummy_read_page)
+				       struct page *dummy_read_page,
+				       dma_addr_t *dma_addrs)
 {
 	struct radeon_ttm_backend *gtt;

diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index f999e36..1c4a72f 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -47,7 +47,8 @@ struct ttm_agp_backend {

 static int ttm_agp_populate(struct ttm_backend *backend,
 			    unsigned long num_pages, struct page **pages,
-			    struct page *dummy_read_page)
+			    struct page *dummy_read_page,
+			    dma_addr_t *dma_addrs)
 {
 	struct ttm_agp_backend *agp_be =
 	    container_of(backend, struct ttm_agp_backend, backend);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 0d39001..86d5b17 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -169,7 +169,7 @@ int ttm_tt_populate(struct ttm_tt *ttm)
 	}

 	be->func->populate(be, ttm->num_pages, ttm->pages,
-			   ttm->dummy_read_page);
+			   ttm->dummy_read_page, ttm->dma_address);
 	ttm->state = tt_unbound;
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 80bc37b..87e43e0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -102,7 +102,8 @@ struct vmw_ttm_backend {

 static int vmw_ttm_populate(struct ttm_backend *backend,
 			    unsigned long num_pages, struct page **pages,
-			    struct page *dummy_read_page)
+			    struct page *dummy_read_page,
+			    dma_addr_t *dma_addrs)
 {
 	struct vmw_ttm_backend *vmw_be =
 	    container_of(backend, struct vmw_ttm_backend, backend);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 6dc4fcc..ebcd3dd 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -50,13 +50,15 @@ struct ttm_backend_func {
 	 * @pages: Array of pointers to ttm pages.
 	 * @dummy_read_page: Page to be used instead of NULL pages in the
 	 * array @pages.
+	 * @dma_addrs: Array of DMA (bus) address of the ttm pages.
 	 *
 	 * Populate the backend with ttm pages. Depending on the backend,
 	 * it may or may not copy the @pages array.
 	 */
 	int (*populate) (struct ttm_backend *backend,
 			 unsigned long num_pages, struct page **pages,
-			 struct page *dummy_read_page);
+			 struct page *dummy_read_page,
+			 dma_addr_t *dma_addrs);
 	/**
 	 * struct ttm_backend_func member clear
 	 *
--
1.7.4


From 340dec734021d8600029e8b3178280cc8d3af251 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 2 Dec 2010 11:04:29 -0500
Subject: [PATCH 053/197] radeon/ttm/PCIe: Use dma_addr if TTM has set it.

If the TTM layer has used the DMA API to setup pages that are
TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the dma_addr_t
array for pages that are to in DMA32 pool."), lets use it
when programming the GART in the PCIe type cards.

This patch skips doing the pci_map_page (and pci_unmap_page) if
there is a DMA addresses passed in for that page. If the dma_address
is zero (or DMA_ERROR_CODE), then we continue on with our old
behaviour.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/gpu/drm/radeon/radeon.h      |    4 ++-
 drivers/gpu/drm/radeon/radeon_gart.c |   36 ++++++++++++++++++++++++---------
 drivers/gpu/drm/radeon/radeon_ttm.c  |    5 +++-
 3 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 73f600d..c9bbab9 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -317,6 +317,7 @@ struct radeon_gart {
 	union radeon_gart_table		table;
 	struct page			**pages;
 	dma_addr_t			*pages_addr;
+	bool				*ttm_alloced;
 	bool				ready;
 };

@@ -329,7 +330,8 @@ void radeon_gart_fini(struct radeon_device *rdev);
 void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 			int pages);
 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
-		     int pages, struct page **pagelist);
+		     int pages, struct page **pagelist,
+		     dma_addr_t *dma_addr);


 /*
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index e65b903..4a5ac4b 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -149,8 +149,9 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
 	for (i = 0; i < pages; i++, p++) {
 		if (rdev->gart.pages[p]) {
-			pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
-				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+			if (!rdev->gart.ttm_alloced[p])
+				pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
+				       		PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 			rdev->gart.pages[p] = NULL;
 			rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
 			page_base = rdev->gart.pages_addr[p];
@@ -165,7 +166,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 }

 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
-		     int pages, struct page **pagelist)
+		     int pages, struct page **pagelist, dma_addr_t *dma_addr)
 {
 	unsigned t;
 	unsigned p;
@@ -180,15 +181,22 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);

 	for (i = 0; i < pages; i++, p++) {
-		/* we need to support large memory configurations */
-		/* assume that unbind have already been call on the range */
-		rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
+		/* On TTM path, we only use the DMA API if TTM_PAGE_FLAG_DMA32
+		 * is requested. */
+		if (dma_addr[i] != DMA_ERROR_CODE) {
+			rdev->gart.ttm_alloced[p] = true;
+			rdev->gart.pages_addr[p] = dma_addr[i];
+		} else {
+			/* we need to support large memory configurations */
+			/* assume that unbind have already been call on the range */
+			rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
 							0, PAGE_SIZE,
 							PCI_DMA_BIDIRECTIONAL);
-		if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
-			/* FIXME: failed to map page (return -ENOMEM?) */
-			radeon_gart_unbind(rdev, offset, pages);
-			return -ENOMEM;
+			if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
+				/* FIXME: failed to map page (return -ENOMEM?) */
+				radeon_gart_unbind(rdev, offset, pages);
+				return -ENOMEM;
+			}
 		}
 		rdev->gart.pages[p] = pagelist[i];
 		page_base = rdev->gart.pages_addr[p];
@@ -251,6 +259,12 @@ int radeon_gart_init(struct radeon_device *rdev)
 		radeon_gart_fini(rdev);
 		return -ENOMEM;
 	}
+	rdev->gart.ttm_alloced = kzalloc(sizeof(bool) *
+					rdev->gart.num_cpu_pages, GFP_KERNEL);
+	if (rdev->gart.ttm_alloced == NULL) {
+		radeon_gart_fini(rdev);
+		return -ENOMEM;
+	}
 	/* set GART entry to point to the dummy page by default */
 	for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
 		rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
@@ -267,6 +281,8 @@ void radeon_gart_fini(struct radeon_device *rdev)
 	rdev->gart.ready = false;
 	kfree(rdev->gart.pages);
 	kfree(rdev->gart.pages_addr);
+	kfree(rdev->gart.ttm_alloced);
 	rdev->gart.pages = NULL;
 	rdev->gart.pages_addr = NULL;
+	rdev->gart.ttm_alloced = NULL;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 6f156e9..ca04505 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -647,6 +647,7 @@ struct radeon_ttm_backend {
 	unsigned long			num_pages;
 	struct page			**pages;
 	struct page			*dummy_read_page;
+	dma_addr_t			*dma_addrs;
 	bool				populated;
 	bool				bound;
 	unsigned			offset;
@@ -662,6 +663,7 @@ static int radeon_ttm_backend_populate(struct ttm_backend *backend,

 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
 	gtt->pages = pages;
+	gtt->dma_addrs = dma_addrs;
 	gtt->num_pages = num_pages;
 	gtt->dummy_read_page = dummy_read_page;
 	gtt->populated = true;
@@ -674,6 +676,7 @@ static void radeon_ttm_backend_clear(struct ttm_backend *backend)

 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
 	gtt->pages = NULL;
+	gtt->dma_addrs = NULL;
 	gtt->num_pages = 0;
 	gtt->dummy_read_page = NULL;
 	gtt->populated = false;
@@ -694,7 +697,7 @@ static int radeon_ttm_backend_bind(struct ttm_backend *backend,
 		     gtt->num_pages, bo_mem, backend);
 	}
 	r = radeon_gart_bind(gtt->rdev, gtt->offset,
-			     gtt->num_pages, gtt->pages);
+			     gtt->num_pages, gtt->pages, gtt->dma_addrs);
 	if (r) {
 		DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
 			  gtt->num_pages, gtt->offset);
--
1.7.4


From b4efe20150e5e9a483faf67ba0d947dbface96cd Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 2 Dec 2010 11:36:24 -0500
Subject: [PATCH 054/197] nouveau/ttm/PCIe: Use dma_addr if TTM has set it.

If the TTM layer has used the DMA API to setup pages that are
TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the dma_addr_t
array for pages that are to in DMA32 pool."), lets use it
when programming the GART in the PCIe type cards.

This patch skips doing the pci_map_page (and pci_unmap_page) if
there is a DMA addresses passed in for that page. If the dma_address
is zero (or DMA_ERROR_CODE), then we continue on with our old
behaviour.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/gpu/drm/nouveau/nouveau_sgdma.c |   28 +++++++++++++++++++++-------
 1 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index edc140a..bbdd982 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -12,6 +12,7 @@ struct nouveau_sgdma_be {
 	struct drm_device *dev;

 	dma_addr_t *pages;
+	bool *ttm_alloced;
 	unsigned nr_pages;

 	unsigned pte_start;
@@ -35,15 +36,25 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
 	if (!nvbe->pages)
 		return -ENOMEM;

+	nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL);
+	if (!nvbe->ttm_alloced)
+		return -ENOMEM;
+
 	nvbe->nr_pages = 0;
 	while (num_pages--) {
-		nvbe->pages[nvbe->nr_pages] =
-			pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
+		if (dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE) {
+			nvbe->pages[nvbe->nr_pages] =
+					dma_addrs[nvbe->nr_pages];
+		 	nvbe->ttm_alloced[nvbe->nr_pages] = true;
+		} else {
+			nvbe->pages[nvbe->nr_pages] =
+				pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
 				     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-		if (pci_dma_mapping_error(dev->pdev,
-					  nvbe->pages[nvbe->nr_pages])) {
-			be->func->clear(be);
-			return -EFAULT;
+			if (pci_dma_mapping_error(dev->pdev,
+						  nvbe->pages[nvbe->nr_pages])) {
+				be->func->clear(be);
+				return -EFAULT;
+			}
 		}

 		nvbe->nr_pages++;
@@ -66,11 +77,14 @@ nouveau_sgdma_clear(struct ttm_backend *be)
 			be->func->unbind(be);

 		while (nvbe->nr_pages--) {
-			pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
+			if (!nvbe->ttm_alloced[nvbe->nr_pages])
+				pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
 				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 		}
 		kfree(nvbe->pages);
+		kfree(nvbe->ttm_alloced);
 		nvbe->pages = NULL;
+		nvbe->ttm_alloced = NULL;
 		nvbe->nr_pages = 0;
 	}
 }
--
1.7.4


From 03c4949992e2b7e84b7cdeb156d803db3f848b6c Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 9 Feb 2009 12:05:52 -0800
Subject: [PATCH 058/197] xen: netback: Initial import of linux-2.6.18-xen.hg netback driver.

This corresponds to 774:107e10e0e07c in that tree.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/Kconfig             |    7 +
 drivers/xen/Makefile            |    1 +
 drivers/xen/netback/Makefile    |    3 +
 drivers/xen/netback/common.h    |  217 ++++++
 drivers/xen/netback/interface.c |  336 ++++++++
 drivers/xen/netback/netback.c   | 1637 +++++++++++++++++++++++++++++++++++++++
 drivers/xen/netback/xenbus.c    |  454 +++++++++++
 7 files changed, 2655 insertions(+), 0 deletions(-)
 create mode 100644 drivers/xen/netback/Makefile
 create mode 100644 drivers/xen/netback/common.h
 create mode 100644 drivers/xen/netback/interface.c
 create mode 100644 drivers/xen/netback/netback.c
 create mode 100644 drivers/xen/netback/xenbus.c

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 5a48ce9..7e83d43 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -37,6 +37,13 @@ config XEN_BACKEND
 	depends on XEN_PCIDEV_BACKEND


+config XEN_NETDEV_BACKEND
+       bool "Xen backend network device"
+       depends on XEN_BACKEND && NET
+       help
+         Implement the network backend driver, which passes packets
+         from the guest domain's frontend drivers to the network.
+
 config XENFS
 	tristate "Xen filesystem"
 	default y
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 533a199..c0e0509 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
 obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
+obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
 obj-$(CONFIG_XENFS)		+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
new file mode 100644
index 0000000..f4a0c51
--- /dev/null
+++ b/drivers/xen/netback/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
+
+netbk-y   := netback.o xenbus.o interface.o
diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
new file mode 100644
index 0000000..9a54d57
--- /dev/null
+++ b/drivers/xen/netback/common.h
@@ -0,0 +1,217 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/common.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __NETIF__BACKEND__COMMON_H__
+#define __NETIF__BACKEND__COMMON_H__
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/wait.h>
+#include <xen/evtchn.h>
+#include <xen/interface/io/netif.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <xen/interface/grant_table.h>
+#include <xen/gnttab.h>
+#include <xen/driver_util.h>
+#include <xen/xenbus.h>
+
+#define DPRINTK(_f, _a...)			\
+	pr_debug("(file=%s, line=%d) " _f,	\
+		 __FILE__ , __LINE__ , ## _a )
+#define IPRINTK(fmt, args...)				\
+	printk(KERN_INFO "xen_net: " fmt, ##args)
+#define WPRINTK(fmt, args...)				\
+	printk(KERN_WARNING "xen_net: " fmt, ##args)
+
+typedef struct netif_st {
+	/* Unique identifier for this interface. */
+	domid_t          domid;
+	unsigned int     handle;
+
+	u8               fe_dev_addr[6];
+
+	/* Physical parameters of the comms window. */
+	grant_handle_t   tx_shmem_handle;
+	grant_ref_t      tx_shmem_ref;
+	grant_handle_t   rx_shmem_handle;
+	grant_ref_t      rx_shmem_ref;
+	unsigned int     irq;
+
+	/* The shared rings and indexes. */
+	netif_tx_back_ring_t tx;
+	netif_rx_back_ring_t rx;
+	struct vm_struct *tx_comms_area;
+	struct vm_struct *rx_comms_area;
+
+	/* Set of features that can be turned on in dev->features. */
+	int features;
+
+	/* Internal feature information. */
+	u8 can_queue:1;	/* can queue packets for receiver? */
+	u8 copying_receiver:1;	/* copy packets to receiver?       */
+
+	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
+	RING_IDX rx_req_cons_peek;
+
+	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+	unsigned long   credit_bytes;
+	unsigned long   credit_usec;
+	unsigned long   remaining_credit;
+	struct timer_list credit_timeout;
+
+	/* Enforce draining of the transmit queue. */
+	struct timer_list tx_queue_timeout;
+
+	/* Miscellaneous private stuff. */
+	struct list_head list;  /* scheduling list */
+	atomic_t         refcnt;
+	struct net_device *dev;
+	struct net_device_stats stats;
+
+	unsigned int carrier;
+
+	wait_queue_head_t waiting_to_free;
+} netif_t;
+
+/*
+ * Implement our own carrier flag: the network stack's version causes delays
+ * when the carrier is re-enabled (in particular, dev_activate() may not
+ * immediately be called, which can cause packet loss; also the etherbridge
+ * can be rather lazy in activating its port).
+ */
+#define netback_carrier_on(netif)	((netif)->carrier = 1)
+#define netback_carrier_off(netif)	((netif)->carrier = 0)
+#define netback_carrier_ok(netif)	((netif)->carrier)
+
+enum {
+	NETBK_DONT_COPY_SKB,
+	NETBK_DELAYED_COPY_SKB,
+	NETBK_ALWAYS_COPY_SKB,
+};
+
+extern int netbk_copy_skb_mode;
+
+/* Function pointers into netback accelerator plugin modules */
+struct netback_accel_hooks {
+	struct module *owner;
+	int  (*probe)(struct xenbus_device *dev);
+	int (*remove)(struct xenbus_device *dev);
+};
+
+/* Structure to track the state of a netback accelerator plugin */
+struct netback_accelerator {
+	struct list_head link;
+	int id;
+	char *eth_name;
+	atomic_t use_count;
+	struct netback_accel_hooks *hooks;
+};
+
+struct backend_info {
+	struct xenbus_device *dev;
+	netif_t *netif;
+	enum xenbus_state frontend_state;
+
+	/* State relating to the netback accelerator */
+	void *netback_accel_priv;
+	/* The accelerator that this backend is currently using */
+	struct netback_accelerator *accelerator;
+};
+
+#define NETBACK_ACCEL_VERSION 0x00010001
+
+/*
+ * Connect an accelerator plugin module to netback.  Returns zero on
+ * success, < 0 on error, > 0 (with highest version number supported)
+ * if version mismatch.
+ */
+extern int netback_connect_accelerator(unsigned version,
+				       int id, const char *eth_name,
+				       struct netback_accel_hooks *hooks);
+/* Disconnect a previously connected accelerator plugin module */
+extern void netback_disconnect_accelerator(int id, const char *eth_name);
+
+
+extern
+void netback_probe_accelerators(struct backend_info *be,
+				struct xenbus_device *dev);
+extern
+void netback_remove_accelerators(struct backend_info *be,
+				 struct xenbus_device *dev);
+extern
+void netif_accel_init(void);
+
+
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+void netif_disconnect(netif_t *netif);
+
+netif_t *netif_alloc(domid_t domid, unsigned int handle);
+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+	      unsigned long rx_ring_ref, unsigned int evtchn);
+
+#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define netif_put(_b)						\
+	do {							\
+		if ( atomic_dec_and_test(&(_b)->refcnt) )	\
+			wake_up(&(_b)->waiting_to_free);	\
+	} while (0)
+
+void netif_xenbus_init(void);
+
+#define netif_schedulable(netif)				\
+	(netif_running((netif)->dev) && netback_carrier_ok(netif))
+
+void netif_schedule_work(netif_t *netif);
+void netif_deschedule_work(netif_t *netif);
+
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
+struct net_device_stats *netif_be_get_stats(struct net_device *dev);
+irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+
+static inline int netbk_can_queue(struct net_device *dev)
+{
+	netif_t *netif = netdev_priv(dev);
+	return netif->can_queue;
+}
+
+static inline int netbk_can_sg(struct net_device *dev)
+{
+	netif_t *netif = netdev_priv(dev);
+	return netif->features & NETIF_F_SG;
+}
+
+#endif /* __NETIF__BACKEND__COMMON_H__ */
diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
new file mode 100644
index 0000000..7e67941
--- /dev/null
+++ b/drivers/xen/netback/interface.c
@@ -0,0 +1,336 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/interface.c
+ *
+ * Network-device interface management.
+ *
+ * Copyright (c) 2004-2005, Keir Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "common.h"
+#include <linux/ethtool.h>
+#include <linux/rtnetlink.h>
+
+/*
+ * Module parameter 'queue_length':
+ *
+ * Enables queuing in the network stack when a client has run out of receive
+ * descriptors. Although this feature can improve receive bandwidth by avoiding
+ * packet loss, it can also result in packets sitting in the 'tx_queue' for
+ * unbounded time. This is bad if those packets hold onto foreign resources.
+ * For example, consider a packet that holds onto resources belonging to the
+ * guest for which it is queued (e.g., packet received on vif1.0, destined for
+ * vif1.1 which is not activated in the guest): in this situation the guest
+ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
+ * run a timer (tx_queue_timeout) to drain the queue when the interface is
+ * blocked.
+ */
+static unsigned long netbk_queue_length = 32;
+module_param_named(queue_length, netbk_queue_length, ulong, 0);
+
+static void __netif_up(netif_t *netif)
+{
+	enable_irq(netif->irq);
+	netif_schedule_work(netif);
+}
+
+static void __netif_down(netif_t *netif)
+{
+	disable_irq(netif->irq);
+	netif_deschedule_work(netif);
+}
+
+static int net_open(struct net_device *dev)
+{
+	netif_t *netif = netdev_priv(dev);
+	if (netback_carrier_ok(netif)) {
+		__netif_up(netif);
+		netif_start_queue(dev);
+	}
+	return 0;
+}
+
+static int net_close(struct net_device *dev)
+{
+	netif_t *netif = netdev_priv(dev);
+	if (netback_carrier_ok(netif))
+		__netif_down(netif);
+	netif_stop_queue(dev);
+	return 0;
+}
+
+static int netbk_change_mtu(struct net_device *dev, int mtu)
+{
+	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+	if (mtu > max)
+		return -EINVAL;
+	dev->mtu = mtu;
+	return 0;
+}
+
+static int netbk_set_sg(struct net_device *dev, u32 data)
+{
+	if (data) {
+		netif_t *netif = netdev_priv(dev);
+
+		if (!(netif->features & NETIF_F_SG))
+			return -ENOSYS;
+	}
+
+	return ethtool_op_set_sg(dev, data);
+}
+
+static int netbk_set_tso(struct net_device *dev, u32 data)
+{
+	if (data) {
+		netif_t *netif = netdev_priv(dev);
+
+		if (!(netif->features & NETIF_F_TSO))
+			return -ENOSYS;
+	}
+
+	return ethtool_op_set_tso(dev, data);
+}
+
+static struct ethtool_ops network_ethtool_ops =
+{
+	.get_tx_csum = ethtool_op_get_tx_csum,
+	.set_tx_csum = ethtool_op_set_tx_csum,
+	.get_sg = ethtool_op_get_sg,
+	.set_sg = netbk_set_sg,
+	.get_tso = ethtool_op_get_tso,
+	.set_tso = netbk_set_tso,
+	.get_link = ethtool_op_get_link,
+};
+
+netif_t *netif_alloc(domid_t domid, unsigned int handle)
+{
+	int err = 0;
+	struct net_device *dev;
+	netif_t *netif;
+	char name[IFNAMSIZ] = {};
+
+	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+	dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
+	if (dev == NULL) {
+		DPRINTK("Could not create netif: out of memory\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	netif = netdev_priv(dev);
+	memset(netif, 0, sizeof(*netif));
+	netif->domid  = domid;
+	netif->handle = handle;
+	atomic_set(&netif->refcnt, 1);
+	init_waitqueue_head(&netif->waiting_to_free);
+	netif->dev = dev;
+
+	netback_carrier_off(netif);
+
+	netif->credit_bytes = netif->remaining_credit = ~0UL;
+	netif->credit_usec  = 0UL;
+	init_timer(&netif->credit_timeout);
+	/* Initialize 'expires' now: it's used to track the credit window. */
+	netif->credit_timeout.expires = jiffies;
+
+	init_timer(&netif->tx_queue_timeout);
+
+	dev->hard_start_xmit = netif_be_start_xmit;
+	dev->get_stats       = netif_be_get_stats;
+	dev->open            = net_open;
+	dev->stop            = net_close;
+	dev->change_mtu	     = netbk_change_mtu;
+	dev->features        = NETIF_F_IP_CSUM;
+
+	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+
+	dev->tx_queue_len = netbk_queue_length;
+
+	/*
+	 * Initialise a dummy MAC address. We choose the numerically
+	 * largest non-broadcast address to prevent the address getting
+	 * stolen by an Ethernet bridge for STP purposes.
+	 * (FE:FF:FF:FF:FF:FF)
+	 */
+	memset(dev->dev_addr, 0xFF, ETH_ALEN);
+	dev->dev_addr[0] &= ~0x01;
+
+	rtnl_lock();
+	err = register_netdevice(dev);
+	rtnl_unlock();
+	if (err) {
+		DPRINTK("Could not register new net device %s: err=%d\n",
+			dev->name, err);
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
+
+	DPRINTK("Successfully created netif\n");
+	return netif;
+}
+
+static int map_frontend_pages(
+	netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
+{
+	struct gnttab_map_grant_ref op;
+
+	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
+			  GNTMAP_host_map, tx_ring_ref, netif->domid);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+		BUG();
+
+	if (op.status) {
+		DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
+		return op.status;
+	}
+
+	netif->tx_shmem_ref    = tx_ring_ref;
+	netif->tx_shmem_handle = op.handle;
+
+	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
+			  GNTMAP_host_map, rx_ring_ref, netif->domid);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+		BUG();
+
+	if (op.status) {
+		DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
+		return op.status;
+	}
+
+	netif->rx_shmem_ref    = rx_ring_ref;
+	netif->rx_shmem_handle = op.handle;
+
+	return 0;
+}
+
+static void unmap_frontend_pages(netif_t *netif)
+{
+	struct gnttab_unmap_grant_ref op;
+
+	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
+			    GNTMAP_host_map, netif->tx_shmem_handle);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+		BUG();
+
+	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
+			    GNTMAP_host_map, netif->rx_shmem_handle);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+		BUG();
+}
+
+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+	      unsigned long rx_ring_ref, unsigned int evtchn)
+{
+	int err = -ENOMEM;
+	netif_tx_sring_t *txs;
+	netif_rx_sring_t *rxs;
+
+	/* Already connected through? */
+	if (netif->irq)
+		return 0;
+
+	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
+	if (netif->tx_comms_area == NULL)
+		return -ENOMEM;
+	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
+	if (netif->rx_comms_area == NULL)
+		goto err_rx;
+
+	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
+	if (err)
+		goto err_map;
+
+	err = bind_interdomain_evtchn_to_irqhandler(
+		netif->domid, evtchn, netif_be_int, 0,
+		netif->dev->name, netif);
+	if (err < 0)
+		goto err_hypervisor;
+	netif->irq = err;
+	disable_irq(netif->irq);
+
+	txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
+	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+
+	rxs = (netif_rx_sring_t *)
+		((char *)netif->rx_comms_area->addr);
+	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+
+	netif->rx_req_cons_peek = 0;
+
+	netif_get(netif);
+
+	rtnl_lock();
+	netback_carrier_on(netif);
+	if (netif_running(netif->dev))
+		__netif_up(netif);
+	rtnl_unlock();
+
+	return 0;
+err_hypervisor:
+	unmap_frontend_pages(netif);
+err_map:
+	free_vm_area(netif->rx_comms_area);
+err_rx:
+	free_vm_area(netif->tx_comms_area);
+	return err;
+}
+
+void netif_disconnect(netif_t *netif)
+{
+	if (netback_carrier_ok(netif)) {
+		rtnl_lock();
+		netback_carrier_off(netif);
+		netif_carrier_off(netif->dev); /* discard queued packets */
+		if (netif_running(netif->dev))
+			__netif_down(netif);
+		rtnl_unlock();
+		netif_put(netif);
+	}
+
+	atomic_dec(&netif->refcnt);
+	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
+
+	del_timer_sync(&netif->credit_timeout);
+	del_timer_sync(&netif->tx_queue_timeout);
+
+	if (netif->irq)
+		unbind_from_irqhandler(netif->irq, netif);
+
+	unregister_netdev(netif->dev);
+
+	if (netif->tx.sring) {
+		unmap_frontend_pages(netif);
+		free_vm_area(netif->tx_comms_area);
+		free_vm_area(netif->rx_comms_area);
+	}
+
+	free_netdev(netif->dev);
+}
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
new file mode 100644
index 0000000..db629d4
--- /dev/null
+++ b/drivers/xen/netback/netback.c
@@ -0,0 +1,1637 @@
+/******************************************************************************
+ * drivers/xen/netback/netback.c
+ *
+ * Back-end of the driver for virtual network devices. This portion of the
+ * driver exports a 'unified' network-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A
+ * reference front-end implementation can be found in:
+ *  drivers/xen/netfront/netfront.c
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "common.h"
+#include <xen/balloon.h>
+#include <xen/interface/memory.h>
+
+/*define NETBE_DEBUG_INTERRUPT*/
+
+struct netbk_rx_meta {
+	skb_frag_t frag;
+	int id;
+	u8 copy:1;
+};
+
+struct netbk_tx_pending_inuse {
+	struct list_head list;
+	unsigned long alloc_time;
+};
+
+static void netif_idx_release(u16 pending_idx);
+static void make_tx_response(netif_t *netif,
+			     netif_tx_request_t *txp,
+			     s8       st);
+static netif_rx_response_t *make_rx_response(netif_t *netif,
+					     u16      id,
+					     s8       st,
+					     u16      offset,
+					     u16      size,
+					     u16      flags);
+
+static void net_tx_action(unsigned long unused);
+static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
+
+static void net_rx_action(unsigned long unused);
+static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
+
+static struct timer_list net_timer;
+static struct timer_list netbk_tx_pending_timer;
+
+#define MAX_PENDING_REQS 256
+
+static struct sk_buff_head rx_queue;
+
+static struct page **mmap_pages;
+static inline unsigned long idx_to_pfn(unsigned int idx)
+{
+	return page_to_pfn(mmap_pages[idx]);
+}
+
+static inline unsigned long idx_to_kaddr(unsigned int idx)
+{
+	return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
+}
+
+/* extra field used in struct page */
+static inline void netif_set_page_index(struct page *pg, unsigned int index)
+{
+	*(unsigned long *)&pg->mapping = index;
+}
+
+static inline int netif_page_index(struct page *pg)
+{
+	unsigned long idx = (unsigned long)pg->mapping;
+
+	if (!PageForeign(pg))
+		return -1;
+
+	if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
+		return -1;
+
+	return idx;
+}
+
+#define PKT_PROT_LEN 64
+
+static struct pending_tx_info {
+	netif_tx_request_t req;
+	netif_t *netif;
+} pending_tx_info[MAX_PENDING_REQS];
+static u16 pending_ring[MAX_PENDING_REQS];
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+/* Freed TX SKBs get batched on this ring before return to pending_ring. */
+static u16 dealloc_ring[MAX_PENDING_REQS];
+static PEND_RING_IDX dealloc_prod, dealloc_cons;
+
+/* Doubly-linked list of in-use pending entries. */
+static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+static LIST_HEAD(pending_inuse_head);
+
+static struct sk_buff_head tx_queue;
+
+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+
+static struct list_head net_schedule_list;
+static spinlock_t net_schedule_list_lock;
+
+#define MAX_MFN_ALLOC 64
+static unsigned long mfn_list[MAX_MFN_ALLOC];
+static unsigned int alloc_index = 0;
+
+/* Setting this allows the safe use of this driver without netloop. */
+static int MODPARM_copy_skb = 1;
+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+
+int netbk_copy_skb_mode;
+
+static inline unsigned long alloc_mfn(void)
+{
+	BUG_ON(alloc_index == 0);
+	return mfn_list[--alloc_index];
+}
+
+static int check_mfn(int nr)
+{
+	struct xen_memory_reservation reservation = {
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+	int rc;
+
+	if (likely(alloc_index >= nr))
+		return 0;
+
+	set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
+	reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
+	rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
+	if (likely(rc > 0))
+		alloc_index += rc;
+
+	return alloc_index >= nr ? 0 : -ENOMEM;
+}
+
+static inline void maybe_schedule_tx_action(void)
+{
+	smp_mb();
+	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+	    !list_empty(&net_schedule_list))
+		tasklet_schedule(&net_tx_tasklet);
+}
+
+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+{
+	struct skb_shared_info *ninfo;
+	struct sk_buff *nskb;
+	unsigned long offset;
+	int ret;
+	int len;
+	int headlen;
+
+	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
+
+	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!nskb))
+		goto err;
+
+	skb_reserve(nskb, 16 + NET_IP_ALIGN);
+	headlen = nskb->end - nskb->data;
+	if (headlen > skb_headlen(skb))
+		headlen = skb_headlen(skb);
+	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+	BUG_ON(ret);
+
+	ninfo = skb_shinfo(nskb);
+	ninfo->gso_size = skb_shinfo(skb)->gso_size;
+	ninfo->gso_type = skb_shinfo(skb)->gso_type;
+
+	offset = headlen;
+	len = skb->len - headlen;
+
+	nskb->len = skb->len;
+	nskb->data_len = len;
+	nskb->truesize += len;
+
+	while (len) {
+		struct page *page;
+		int copy;
+		int zero;
+
+		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+			dump_stack();
+			goto err_free;
+		}
+
+		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+
+		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
+		if (unlikely(!page))
+			goto err_free;
+
+		ret = skb_copy_bits(skb, offset, page_address(page), copy);
+		BUG_ON(ret);
+
+		ninfo->frags[ninfo->nr_frags].page = page;
+		ninfo->frags[ninfo->nr_frags].page_offset = 0;
+		ninfo->frags[ninfo->nr_frags].size = copy;
+		ninfo->nr_frags++;
+
+		offset += copy;
+		len -= copy;
+	}
+
+	offset = nskb->data - skb->data;
+
+	nskb->h.raw = skb->h.raw + offset;
+	nskb->nh.raw = skb->nh.raw + offset;
+	nskb->mac.raw = skb->mac.raw + offset;
+
+	return nskb;
+
+ err_free:
+	kfree_skb(nskb);
+ err:
+	return NULL;
+}
+
+static inline int netbk_max_required_rx_slots(netif_t *netif)
+{
+	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
+		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+	return 1; /* all in one */
+}
+
+static inline int netbk_queue_full(netif_t *netif)
+{
+	RING_IDX peek   = netif->rx_req_cons_peek;
+	RING_IDX needed = netbk_max_required_rx_slots(netif);
+
+	return ((netif->rx.sring->req_prod - peek) < needed) ||
+	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+}
+
+static void tx_queue_callback(unsigned long data)
+{
+	netif_t *netif = (netif_t *)data;
+	if (netif_schedulable(netif))
+		netif_wake_queue(netif->dev);
+}
+
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	netif_t *netif = netdev_priv(dev);
+
+	BUG_ON(skb->dev != dev);
+
+	/* Drop the packet if the target domain has no receive buffers. */
+	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+		goto drop;
+
+	/*
+	 * Copy the packet here if it's destined for a flipping interface
+	 * but isn't flippable (e.g. extra references to data).
+	 * XXX For now we also copy skbuffs whose head crosses a page
+	 * boundary, because netbk_gop_skb can't handle them.
+	 */
+	if (!netif->copying_receiver ||
+	    ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
+		struct sk_buff *nskb = netbk_copy_skb(skb);
+		if ( unlikely(nskb == NULL) )
+			goto drop;
+		/* Copy only the header fields we use in this driver. */
+		nskb->dev = skb->dev;
+		nskb->ip_summed = skb->ip_summed;
+		nskb->proto_data_valid = skb->proto_data_valid;
+		dev_kfree_skb(skb);
+		skb = nskb;
+	}
+
+	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
+				   !!skb_shinfo(skb)->gso_size;
+	netif_get(netif);
+
+	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
+		netif->rx.sring->req_event = netif->rx_req_cons_peek +
+			netbk_max_required_rx_slots(netif);
+		mb(); /* request notification /then/ check & stop the queue */
+		if (netbk_queue_full(netif)) {
+			netif_stop_queue(dev);
+			/*
+			 * Schedule 500ms timeout to restart the queue, thus
+			 * ensuring that an inactive queue will be drained.
+			 * Packets will be immediately be dropped until more
+			 * receive buffers become available (see
+			 * netbk_queue_full() check above).
+			 */
+			netif->tx_queue_timeout.data = (unsigned long)netif;
+			netif->tx_queue_timeout.function = tx_queue_callback;
+			__mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+		}
+	}
+
+	skb_queue_tail(&rx_queue, skb);
+	tasklet_schedule(&net_rx_tasklet);
+
+	return 0;
+
+ drop:
+	netif->stats.tx_dropped++;
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+#if 0
+static void xen_network_done_notify(void)
+{
+	static struct net_device *eth0_dev = NULL;
+	if (unlikely(eth0_dev == NULL))
+		eth0_dev = __dev_get_by_name("eth0");
+	netif_rx_schedule(eth0_dev);
+}
+/*
+ * Add following to poll() function in NAPI driver (Tigon3 is example):
+ *  if ( xen_network_done() )
+ *      tg3_enable_ints(tp);
+ */
+int xen_network_done(void)
+{
+	return skb_queue_empty(&rx_queue);
+}
+#endif
+
+struct netrx_pending_operations {
+	unsigned trans_prod, trans_cons;
+	unsigned mmu_prod, mmu_mcl;
+	unsigned mcl_prod, mcl_cons;
+	unsigned copy_prod, copy_cons;
+	unsigned meta_prod, meta_cons;
+	mmu_update_t *mmu;
+	gnttab_transfer_t *trans;
+	gnttab_copy_t *copy;
+	multicall_entry_t *mcl;
+	struct netbk_rx_meta *meta;
+};
+
+/* Set up the grant operations for this fragment.  If it's a flipping
+   interface, we also set up the unmap request from here. */
+static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
+			  int i, struct netrx_pending_operations *npo,
+			  struct page *page, unsigned long size,
+			  unsigned long offset)
+{
+	mmu_update_t *mmu;
+	gnttab_transfer_t *gop;
+	gnttab_copy_t *copy_gop;
+	multicall_entry_t *mcl;
+	netif_rx_request_t *req;
+	unsigned long old_mfn, new_mfn;
+	int idx = netif_page_index(page);
+
+	old_mfn = virt_to_mfn(page_address(page));
+
+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
+	if (netif->copying_receiver) {
+		/* The fragment needs to be copied rather than
+		   flipped. */
+		meta->copy = 1;
+		copy_gop = npo->copy + npo->copy_prod++;
+		copy_gop->flags = GNTCOPY_dest_gref;
+		if (idx > -1) {
+			struct pending_tx_info *src_pend = &pending_tx_info[idx];
+			copy_gop->source.domid = src_pend->netif->domid;
+			copy_gop->source.u.ref = src_pend->req.gref;
+			copy_gop->flags |= GNTCOPY_source_gref;
+		} else {
+			copy_gop->source.domid = DOMID_SELF;
+			copy_gop->source.u.gmfn = old_mfn;
+		}
+		copy_gop->source.offset = offset;
+		copy_gop->dest.domid = netif->domid;
+		copy_gop->dest.offset = 0;
+		copy_gop->dest.u.ref = req->gref;
+		copy_gop->len = size;
+	} else {
+		meta->copy = 0;
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+			new_mfn = alloc_mfn();
+
+			/*
+			 * Set the new P2M table entry before
+			 * reassigning the old data page. Heed the
+			 * comment in pgtable-2level.h:pte_page(). :-)
+			 */
+			set_phys_to_machine(page_to_pfn(page), new_mfn);
+
+			mcl = npo->mcl + npo->mcl_prod++;
+			MULTI_update_va_mapping(mcl,
+					     (unsigned long)page_address(page),
+					     pfn_pte_ma(new_mfn, PAGE_KERNEL),
+					     0);
+
+			mmu = npo->mmu + npo->mmu_prod++;
+			mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+				MMU_MACHPHYS_UPDATE;
+			mmu->val = page_to_pfn(page);
+		}
+
+		gop = npo->trans + npo->trans_prod++;
+		gop->mfn = old_mfn;
+		gop->domid = netif->domid;
+		gop->ref = req->gref;
+	}
+	return req->id;
+}
+
+static void netbk_gop_skb(struct sk_buff *skb,
+			  struct netrx_pending_operations *npo)
+{
+	netif_t *netif = netdev_priv(skb->dev);
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	int i;
+	int extra;
+	struct netbk_rx_meta *head_meta, *meta;
+
+	head_meta = npo->meta + npo->meta_prod++;
+	head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
+	head_meta->frag.size = skb_shinfo(skb)->gso_size;
+	extra = !!head_meta->frag.size + 1;
+
+	for (i = 0; i < nr_frags; i++) {
+		meta = npo->meta + npo->meta_prod++;
+		meta->frag = skb_shinfo(skb)->frags[i];
+		meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
+					  meta->frag.page,
+					  meta->frag.size,
+					  meta->frag.page_offset);
+	}
+
+	/*
+	 * This must occur at the end to ensure that we don't trash skb_shinfo
+	 * until we're done. We know that the head doesn't cross a page
+	 * boundary because such packets get copied in netif_be_start_xmit.
+	 */
+	head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
+				       virt_to_page(skb->data),
+				       skb_headlen(skb),
+				       offset_in_page(skb->data));
+
+	netif->rx.req_cons += nr_frags + extra;
+}
+
+static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
+{
+	int i;
+
+	for (i = 0; i < nr_frags; i++)
+		put_page(meta[i].frag.page);
+}
+
+/* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
+   used to set up the operations on the top of
+   netrx_pending_operations, which have since been done.  Check that
+   they didn't give any errors and advance over them. */
+static int netbk_check_gop(int nr_frags, domid_t domid,
+			   struct netrx_pending_operations *npo)
+{
+	multicall_entry_t *mcl;
+	gnttab_transfer_t *gop;
+	gnttab_copy_t     *copy_op;
+	int status = NETIF_RSP_OKAY;
+	int i;
+
+	for (i = 0; i <= nr_frags; i++) {
+		if (npo->meta[npo->meta_cons + i].copy) {
+			copy_op = npo->copy + npo->copy_cons++;
+			if (copy_op->status != GNTST_okay) {
+				DPRINTK("Bad status %d from copy to DOM%d.\n",
+					copy_op->status, domid);
+				status = NETIF_RSP_ERROR;
+			}
+		} else {
+			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+				mcl = npo->mcl + npo->mcl_cons++;
+				/* The update_va_mapping() must not fail. */
+				BUG_ON(mcl->result != 0);
+			}
+
+			gop = npo->trans + npo->trans_cons++;
+			/* Check the reassignment error code. */
+			if (gop->status != 0) {
+				DPRINTK("Bad status %d from grant transfer to DOM%u\n",
+					gop->status, domid);
+				/*
+				 * Page no longer belongs to us unless
+				 * GNTST_bad_page, but that should be
+				 * a fatal error anyway.
+				 */
+				BUG_ON(gop->status == GNTST_bad_page);
+				status = NETIF_RSP_ERROR;
+			}
+		}
+	}
+
+	return status;
+}
+
+static void netbk_add_frag_responses(netif_t *netif, int status,
+				     struct netbk_rx_meta *meta, int nr_frags)
+{
+	int i;
+	unsigned long offset;
+
+	for (i = 0; i < nr_frags; i++) {
+		int id = meta[i].id;
+		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+
+		if (meta[i].copy)
+			offset = 0;
+		else
+			offset = meta[i].frag.page_offset;
+		make_rx_response(netif, id, status, offset,
+				 meta[i].frag.size, flags);
+	}
+}
+
+static void net_rx_action(unsigned long unused)
+{
+	netif_t *netif = NULL;
+	s8 status;
+	u16 id, irq, flags;
+	netif_rx_response_t *resp;
+	multicall_entry_t *mcl;
+	struct sk_buff_head rxq;
+	struct sk_buff *skb;
+	int notify_nr = 0;
+	int ret;
+	int nr_frags;
+	int count;
+	unsigned long offset;
+
+	/*
+	 * Putting hundreds of bytes on the stack is considered rude.
+	 * Static works because a tasklet can only be on one CPU at any time.
+	 */
+	static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
+	static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+	static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
+	static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
+	static unsigned char rx_notify[NR_IRQS];
+	static u16 notify_list[NET_RX_RING_SIZE];
+	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+
+	struct netrx_pending_operations npo = {
+		mmu: rx_mmu,
+		trans: grant_trans_op,
+		copy: grant_copy_op,
+		mcl: rx_mcl,
+		meta: meta};
+
+	skb_queue_head_init(&rxq);
+
+	count = 0;
+
+	while ((skb = skb_dequeue(&rx_queue)) != NULL) {
+		nr_frags = skb_shinfo(skb)->nr_frags;
+		*(int *)skb->cb = nr_frags;
+
+		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
+		    !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
+		    check_mfn(nr_frags + 1)) {
+			/* Memory squeeze? Back off for an arbitrary while. */
+			if ( net_ratelimit() )
+				WPRINTK("Memory squeeze in netback "
+					"driver.\n");
+			mod_timer(&net_timer, jiffies + HZ);
+			skb_queue_head(&rx_queue, skb);
+			break;
+		}
+
+		netbk_gop_skb(skb, &npo);
+
+		count += nr_frags + 1;
+
+		__skb_queue_tail(&rxq, skb);
+
+		/* Filled the batch queue? */
+		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
+			break;
+	}
+
+	BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
+
+	npo.mmu_mcl = npo.mcl_prod;
+	if (npo.mcl_prod) {
+		BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
+		BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
+		mcl = npo.mcl + npo.mcl_prod++;
+
+		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
+		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+
+		mcl->op = __HYPERVISOR_mmu_update;
+		mcl->args[0] = (unsigned long)rx_mmu;
+		mcl->args[1] = npo.mmu_prod;
+		mcl->args[2] = 0;
+		mcl->args[3] = DOMID_SELF;
+	}
+
+	if (npo.trans_prod) {
+		BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
+		mcl = npo.mcl + npo.mcl_prod++;
+		mcl->op = __HYPERVISOR_grant_table_op;
+		mcl->args[0] = GNTTABOP_transfer;
+		mcl->args[1] = (unsigned long)grant_trans_op;
+		mcl->args[2] = npo.trans_prod;
+	}
+
+	if (npo.copy_prod) {
+		BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
+		mcl = npo.mcl + npo.mcl_prod++;
+		mcl->op = __HYPERVISOR_grant_table_op;
+		mcl->args[0] = GNTTABOP_copy;
+		mcl->args[1] = (unsigned long)grant_copy_op;
+		mcl->args[2] = npo.copy_prod;
+	}
+
+	/* Nothing to do? */
+	if (!npo.mcl_prod)
+		return;
+
+	BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
+
+	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
+	BUG_ON(ret != 0);
+	/* The mmu_machphys_update() must not fail. */
+	BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
+
+	while ((skb = __skb_dequeue(&rxq)) != NULL) {
+		nr_frags = *(int *)skb->cb;
+
+		netif = netdev_priv(skb->dev);
+		/* We can't rely on skb_release_data to release the
+		   pages used by fragments for us, since it tries to
+		   touch the pages in the fraglist.  If we're in
+		   flipping mode, that doesn't work.  In copying mode,
+		   we still have access to all of the pages, and so
+		   it's safe to let release_data deal with it. */
+		/* (Freeing the fragments is safe since we copy
+		   non-linear skbs destined for flipping interfaces) */
+		if (!netif->copying_receiver) {
+			atomic_set(&(skb_shinfo(skb)->dataref), 1);
+			skb_shinfo(skb)->frag_list = NULL;
+			skb_shinfo(skb)->nr_frags = 0;
+			netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
+		}
+
+		netif->stats.tx_bytes += skb->len;
+		netif->stats.tx_packets++;
+
+		status = netbk_check_gop(nr_frags, netif->domid, &npo);
+
+		id = meta[npo.meta_cons].id;
+		flags = nr_frags ? NETRXF_more_data : 0;
+
+		if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
+			flags |= NETRXF_csum_blank | NETRXF_data_validated;
+		else if (skb->proto_data_valid) /* remote but checksummed? */
+			flags |= NETRXF_data_validated;
+
+		if (meta[npo.meta_cons].copy)
+			offset = 0;
+		else
+			offset = offset_in_page(skb->data);
+		resp = make_rx_response(netif, id, status, offset,
+					skb_headlen(skb), flags);
+
+		if (meta[npo.meta_cons].frag.size) {
+			struct netif_extra_info *gso =
+				(struct netif_extra_info *)
+				RING_GET_RESPONSE(&netif->rx,
+						  netif->rx.rsp_prod_pvt++);
+
+			resp->flags |= NETRXF_extra_info;
+
+			gso->u.gso.size = meta[npo.meta_cons].frag.size;
+			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+			gso->u.gso.pad = 0;
+			gso->u.gso.features = 0;
+
+			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+			gso->flags = 0;
+		}
+
+		netbk_add_frag_responses(netif, status,
+					 meta + npo.meta_cons + 1,
+					 nr_frags);
+
+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+		irq = netif->irq;
+		if (ret && !rx_notify[irq]) {
+			rx_notify[irq] = 1;
+			notify_list[notify_nr++] = irq;
+		}
+
+		if (netif_queue_stopped(netif->dev) &&
+		    netif_schedulable(netif) &&
+		    !netbk_queue_full(netif))
+			netif_wake_queue(netif->dev);
+
+		netif_put(netif);
+		dev_kfree_skb(skb);
+		npo.meta_cons += nr_frags + 1;
+	}
+
+	while (notify_nr != 0) {
+		irq = notify_list[--notify_nr];
+		rx_notify[irq] = 0;
+		notify_remote_via_irq(irq);
+	}
+
+	/* More work to do? */
+	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
+		tasklet_schedule(&net_rx_tasklet);
+#if 0
+	else
+		xen_network_done_notify();
+#endif
+}
+
+static void net_alarm(unsigned long unused)
+{
+	tasklet_schedule(&net_rx_tasklet);
+}
+
+static void netbk_tx_pending_timeout(unsigned long unused)
+{
+	tasklet_schedule(&net_tx_tasklet);
+}
+
+struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+{
+	netif_t *netif = netdev_priv(dev);
+	return &netif->stats;
+}
+
+static int __on_net_schedule_list(netif_t *netif)
+{
+	return netif->list.next != NULL;
+}
+
+static void remove_from_net_schedule_list(netif_t *netif)
+{
+	spin_lock_irq(&net_schedule_list_lock);
+	if (likely(__on_net_schedule_list(netif))) {
+		list_del(&netif->list);
+		netif->list.next = NULL;
+		netif_put(netif);
+	}
+	spin_unlock_irq(&net_schedule_list_lock);
+}
+
+static void add_to_net_schedule_list_tail(netif_t *netif)
+{
+	if (__on_net_schedule_list(netif))
+		return;
+
+	spin_lock_irq(&net_schedule_list_lock);
+	if (!__on_net_schedule_list(netif) &&
+	    likely(netif_schedulable(netif))) {
+		list_add_tail(&netif->list, &net_schedule_list);
+		netif_get(netif);
+	}
+	spin_unlock_irq(&net_schedule_list_lock);
+}
+
+/*
+ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
+ * If this driver is pipelining transmit requests then we can be very
+ * aggressive in avoiding new-packet notifications -- frontend only needs to
+ * send a notification if there are no outstanding unreceived responses.
+ * If we may be buffer transmit buffers for any reason then we must be rather
+ * more conservative and treat this as the final check for pending work.
+ */
+void netif_schedule_work(netif_t *netif)
+{
+	int more_to_do;
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
+#else
+	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+#endif
+
+	if (more_to_do) {
+		add_to_net_schedule_list_tail(netif);
+		maybe_schedule_tx_action();
+	}
+}
+
+void netif_deschedule_work(netif_t *netif)
+{
+	remove_from_net_schedule_list(netif);
+}
+
+
+static void tx_add_credit(netif_t *netif)
+{
+	unsigned long max_burst, max_credit;
+
+	/*
+	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
+	 * Otherwise the interface can seize up due to insufficient credit.
+	 */
+	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
+	max_burst = min(max_burst, 131072UL);
+	max_burst = max(max_burst, netif->credit_bytes);
+
+	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
+	max_credit = netif->remaining_credit + netif->credit_bytes;
+	if (max_credit < netif->remaining_credit)
+		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
+
+	netif->remaining_credit = min(max_credit, max_burst);
+}
+
+static void tx_credit_callback(unsigned long data)
+{
+	netif_t *netif = (netif_t *)data;
+	tx_add_credit(netif);
+	netif_schedule_work(netif);
+}
+
+static inline int copy_pending_req(PEND_RING_IDX pending_idx)
+{
+	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
+				      &mmap_pages[pending_idx]);
+}
+
+inline static void net_tx_action_dealloc(void)
+{
+	struct netbk_tx_pending_inuse *inuse, *n;
+	gnttab_unmap_grant_ref_t *gop;
+	u16 pending_idx;
+	PEND_RING_IDX dc, dp;
+	netif_t *netif;
+	int ret;
+	LIST_HEAD(list);
+
+	dc = dealloc_cons;
+	gop = tx_unmap_ops;
+
+	/*
+	 * Free up any grants we have finished using
+	 */
+	do {
+		dp = dealloc_prod;
+
+		/* Ensure we see all indices enqueued by netif_idx_release(). */
+		smp_rmb();
+
+		while (dc != dp) {
+			unsigned long pfn;
+
+			pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+			list_move_tail(&pending_inuse[pending_idx].list, &list);
+
+			pfn = idx_to_pfn(pending_idx);
+			/* Already unmapped? */
+			if (!phys_to_machine_mapping_valid(pfn))
+				continue;
+
+			gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
+					    GNTMAP_host_map,
+					    grant_tx_handle[pending_idx]);
+			gop++;
+		}
+
+		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
+		    list_empty(&pending_inuse_head))
+			break;
+
+		/* Copy any entries that have been pending for too long. */
+		list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
+			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+				break;
+
+			switch (copy_pending_req(inuse - pending_inuse)) {
+			case 0:
+				list_move_tail(&inuse->list, &list);
+				continue;
+			case -EBUSY:
+				list_del_init(&inuse->list);
+				continue;
+			case -ENOENT:
+				continue;
+			}
+
+			break;
+		}
+	} while (dp != dealloc_prod);
+
+	dealloc_cons = dc;
+
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
+	BUG_ON(ret);
+
+	list_for_each_entry_safe(inuse, n, &list, list) {
+		pending_idx = inuse - pending_inuse;
+
+		netif = pending_tx_info[pending_idx].netif;
+
+		make_tx_response(netif, &pending_tx_info[pending_idx].req,
+				 NETIF_RSP_OKAY);
+
+		/* Ready for next use. */
+		gnttab_reset_grant_page(mmap_pages[pending_idx]);
+
+		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+
+		netif_put(netif);
+
+		list_del_init(&inuse->list);
+	}
+}
+
+static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
+{
+	RING_IDX cons = netif->tx.req_cons;
+
+	do {
+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		if (cons >= end)
+			break;
+		txp = RING_GET_REQUEST(&netif->tx, cons++);
+	} while (1);
+	netif->tx.req_cons = cons;
+	netif_schedule_work(netif);
+	netif_put(netif);
+}
+
+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
+				netif_tx_request_t *txp, int work_to_do)
+{
+	RING_IDX cons = netif->tx.req_cons;
+	int frags = 0;
+
+	if (!(first->flags & NETTXF_more_data))
+		return 0;
+
+	do {
+		if (frags >= work_to_do) {
+			DPRINTK("Need more frags\n");
+			return -frags;
+		}
+
+		if (unlikely(frags >= MAX_SKB_FRAGS)) {
+			DPRINTK("Too many frags\n");
+			return -frags;
+		}
+
+		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
+		       sizeof(*txp));
+		if (txp->size > first->size) {
+			DPRINTK("Frags galore\n");
+			return -frags;
+		}
+
+		first->size -= txp->size;
+		frags++;
+
+		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+			DPRINTK("txp->offset: %x, size: %u\n",
+				txp->offset, txp->size);
+			return -frags;
+		}
+	} while ((txp++)->flags & NETTXF_more_data);
+
+	return frags;
+}
+
+static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
+						  struct sk_buff *skb,
+						  netif_tx_request_t *txp,
+						  gnttab_map_grant_ref_t *mop)
+{
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	skb_frag_t *frags = shinfo->frags;
+	unsigned long pending_idx = *((u16 *)skb->data);
+	int i, start;
+
+	/* Skip first skb fragment if it is on same page as header fragment. */
+	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+	for (i = start; i < shinfo->nr_frags; i++, txp++) {
+		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
+
+		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
+				  GNTMAP_host_map | GNTMAP_readonly,
+				  txp->gref, netif->domid);
+
+		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+		netif_get(netif);
+		pending_tx_info[pending_idx].netif = netif;
+		frags[i].page = (void *)pending_idx;
+	}
+
+	return mop;
+}
+
+static int netbk_tx_check_mop(struct sk_buff *skb,
+			       gnttab_map_grant_ref_t **mopp)
+{
+	gnttab_map_grant_ref_t *mop = *mopp;
+	int pending_idx = *((u16 *)skb->data);
+	netif_t *netif = pending_tx_info[pending_idx].netif;
+	netif_tx_request_t *txp;
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	int nr_frags = shinfo->nr_frags;
+	int i, err, start;
+
+	/* Check status of header. */
+	err = mop->status;
+	if (unlikely(err)) {
+		txp = &pending_tx_info[pending_idx].req;
+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+		netif_put(netif);
+	} else {
+		set_phys_to_machine(
+			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
+			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+		grant_tx_handle[pending_idx] = mop->handle;
+	}
+
+	/* Skip first skb fragment if it is on same page as header fragment. */
+	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+	for (i = start; i < nr_frags; i++) {
+		int j, newerr;
+
+		pending_idx = (unsigned long)shinfo->frags[i].page;
+
+		/* Check error status: if okay then remember grant handle. */
+		newerr = (++mop)->status;
+		if (likely(!newerr)) {
+			set_phys_to_machine(
+				__pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
+				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+			grant_tx_handle[pending_idx] = mop->handle;
+			/* Had a previous error? Invalidate this fragment. */
+			if (unlikely(err))
+				netif_idx_release(pending_idx);
+			continue;
+		}
+
+		/* Error on this fragment: respond to client with an error. */
+		txp = &pending_tx_info[pending_idx].req;
+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+		netif_put(netif);
+
+		/* Not the first error? Preceding frags already invalidated. */
+		if (err)
+			continue;
+
+		/* First error: invalidate header and preceding fragments. */
+		pending_idx = *((u16 *)skb->data);
+		netif_idx_release(pending_idx);
+		for (j = start; j < i; j++) {
+			pending_idx = (unsigned long)shinfo->frags[i].page;
+			netif_idx_release(pending_idx);
+		}
+
+		/* Remember the error: invalidate all subsequent fragments. */
+		err = newerr;
+	}
+
+	*mopp = mop + 1;
+	return err;
+}
+
+static void netbk_fill_frags(struct sk_buff *skb)
+{
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	int nr_frags = shinfo->nr_frags;
+	int i;
+
+	for (i = 0; i < nr_frags; i++) {
+		skb_frag_t *frag = shinfo->frags + i;
+		netif_tx_request_t *txp;
+		unsigned long pending_idx;
+
+		pending_idx = (unsigned long)frag->page;
+
+		pending_inuse[pending_idx].alloc_time = jiffies;
+		list_add_tail(&pending_inuse[pending_idx].list,
+			      &pending_inuse_head);
+
+		txp = &pending_tx_info[pending_idx].req;
+		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
+		frag->size = txp->size;
+		frag->page_offset = txp->offset;
+
+		skb->len += txp->size;
+		skb->data_len += txp->size;
+		skb->truesize += txp->size;
+	}
+}
+
+int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
+		     int work_to_do)
+{
+	struct netif_extra_info extra;
+	RING_IDX cons = netif->tx.req_cons;
+
+	do {
+		if (unlikely(work_to_do-- <= 0)) {
+			DPRINTK("Missing extra info\n");
+			return -EBADR;
+		}
+
+		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
+		       sizeof(extra));
+		if (unlikely(!extra.type ||
+			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+			netif->tx.req_cons = ++cons;
+			DPRINTK("Invalid extra type: %d\n", extra.type);
+			return -EINVAL;
+		}
+
+		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
+		netif->tx.req_cons = ++cons;
+	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+	return work_to_do;
+}
+
+static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
+{
+	if (!gso->u.gso.size) {
+		DPRINTK("GSO size must not be zero.\n");
+		return -EINVAL;
+	}
+
+	/* Currently only TCPv4 S.O. is supported. */
+	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+		DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
+		return -EINVAL;
+	}
+
+	skb_shinfo(skb)->gso_size = gso->u.gso.size;
+	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+	/* Header must be checked, and gso_segs computed. */
+	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb)->gso_segs = 0;
+
+	return 0;
+}
+
+/* Called after netfront has transmitted */
+static void net_tx_action(unsigned long unused)
+{
+	struct list_head *ent;
+	struct sk_buff *skb;
+	netif_t *netif;
+	netif_tx_request_t txreq;
+	netif_tx_request_t txfrags[MAX_SKB_FRAGS];
+	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+	u16 pending_idx;
+	RING_IDX i;
+	gnttab_map_grant_ref_t *mop;
+	unsigned int data_len;
+	int ret, work_to_do;
+
+	if (dealloc_cons != dealloc_prod)
+		net_tx_action_dealloc();
+
+	mop = tx_map_ops;
+	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+		!list_empty(&net_schedule_list)) {
+		/* Get a netif from the list with work to do. */
+		ent = net_schedule_list.next;
+		netif = list_entry(ent, netif_t, list);
+		netif_get(netif);
+		remove_from_net_schedule_list(netif);
+
+		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+		if (!work_to_do) {
+			netif_put(netif);
+			continue;
+		}
+
+		i = netif->tx.req_cons;
+		rmb(); /* Ensure that we see the request before we copy it. */
+		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+
+		/* Credit-based scheduling. */
+		if (txreq.size > netif->remaining_credit) {
+			unsigned long now = jiffies;
+			unsigned long next_credit =
+				netif->credit_timeout.expires +
+				msecs_to_jiffies(netif->credit_usec / 1000);
+
+			/* Timer could already be pending in rare cases. */
+			if (timer_pending(&netif->credit_timeout)) {
+				netif_put(netif);
+				continue;
+			}
+
+			/* Passed the point where we can replenish credit? */
+			if (time_after_eq(now, next_credit)) {
+				netif->credit_timeout.expires = now;
+				tx_add_credit(netif);
+			}
+
+			/* Still too big to send right now? Set a callback. */
+			if (txreq.size > netif->remaining_credit) {
+				netif->credit_timeout.data     =
+					(unsigned long)netif;
+				netif->credit_timeout.function =
+					tx_credit_callback;
+				__mod_timer(&netif->credit_timeout,
+					    next_credit);
+				netif_put(netif);
+				continue;
+			}
+		}
+		netif->remaining_credit -= txreq.size;
+
+		work_to_do--;
+		netif->tx.req_cons = ++i;
+
+		memset(extras, 0, sizeof(extras));
+		if (txreq.flags & NETTXF_extra_info) {
+			work_to_do = netbk_get_extras(netif, extras,
+						      work_to_do);
+			i = netif->tx.req_cons;
+			if (unlikely(work_to_do < 0)) {
+				netbk_tx_err(netif, &txreq, i);
+				continue;
+			}
+		}
+
+		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
+		if (unlikely(ret < 0)) {
+			netbk_tx_err(netif, &txreq, i - ret);
+			continue;
+		}
+		i += ret;
+
+		if (unlikely(txreq.size < ETH_HLEN)) {
+			DPRINTK("Bad packet size: %d\n", txreq.size);
+			netbk_tx_err(netif, &txreq, i);
+			continue;
+		}
+
+		/* No crossing a page as the payload mustn't fragment. */
+		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
+			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
+				txreq.offset, txreq.size,
+				(txreq.offset &~PAGE_MASK) + txreq.size);
+			netbk_tx_err(netif, &txreq, i);
+			continue;
+		}
+
+		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+		data_len = (txreq.size > PKT_PROT_LEN &&
+			    ret < MAX_SKB_FRAGS) ?
+			PKT_PROT_LEN : txreq.size;
+
+		skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
+				GFP_ATOMIC | __GFP_NOWARN);
+		if (unlikely(skb == NULL)) {
+			DPRINTK("Can't allocate a skb in start_xmit.\n");
+			netbk_tx_err(netif, &txreq, i);
+			break;
+		}
+
+		/* Packets passed to netif_rx() must have some headroom. */
+		skb_reserve(skb, 16 + NET_IP_ALIGN);
+
+		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+			struct netif_extra_info *gso;
+			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+			if (netbk_set_skb_gso(skb, gso)) {
+				kfree_skb(skb);
+				netbk_tx_err(netif, &txreq, i);
+				continue;
+			}
+		}
+
+		gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
+				  GNTMAP_host_map | GNTMAP_readonly,
+				  txreq.gref, netif->domid);
+		mop++;
+
+		memcpy(&pending_tx_info[pending_idx].req,
+		       &txreq, sizeof(txreq));
+		pending_tx_info[pending_idx].netif = netif;
+		*((u16 *)skb->data) = pending_idx;
+
+		__skb_put(skb, data_len);
+
+		skb_shinfo(skb)->nr_frags = ret;
+		if (data_len < txreq.size) {
+			skb_shinfo(skb)->nr_frags++;
+			skb_shinfo(skb)->frags[0].page =
+				(void *)(unsigned long)pending_idx;
+		} else {
+			/* Discriminate from any valid pending_idx value. */
+			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
+		}
+
+		if (skb->data_len < skb_shinfo(skb)->gso_size) {
+			skb_shinfo(skb)->gso_size = 0;
+			skb_shinfo(skb)->gso_type = 0;
+		}
+
+		__skb_queue_tail(&tx_queue, skb);
+
+		pending_cons++;
+
+		mop = netbk_get_requests(netif, skb, txfrags, mop);
+
+		netif->tx.req_cons = i;
+		netif_schedule_work(netif);
+
+		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+			break;
+	}
+
+	if (mop == tx_map_ops)
+		return;
+
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
+	BUG_ON(ret);
+
+	mop = tx_map_ops;
+	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+		netif_tx_request_t *txp;
+
+		pending_idx = *((u16 *)skb->data);
+		netif       = pending_tx_info[pending_idx].netif;
+		txp         = &pending_tx_info[pending_idx].req;
+
+		/* Check the remap error code. */
+		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
+			DPRINTK("netback grant failed.\n");
+			skb_shinfo(skb)->nr_frags = 0;
+			kfree_skb(skb);
+			continue;
+		}
+
+		data_len = skb->len;
+		memcpy(skb->data,
+		       (void *)(idx_to_kaddr(pending_idx)|txp->offset),
+		       data_len);
+		if (data_len < txp->size) {
+			/* Append the packet payload as a fragment. */
+			txp->offset += data_len;
+			txp->size -= data_len;
+		} else {
+			/* Schedule a response immediately. */
+			netif_idx_release(pending_idx);
+		}
+
+		/*
+		 * Old frontends do not assert data_validated but we
+		 * can infer it from csum_blank so test both flags.
+		 */
+		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			skb->proto_data_valid = 1;
+		} else {
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->proto_data_valid = 0;
+		}
+		skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
+
+		netbk_fill_frags(skb);
+
+		skb->dev      = netif->dev;
+		skb->protocol = eth_type_trans(skb, skb->dev);
+
+		netif->stats.rx_bytes += skb->len;
+		netif->stats.rx_packets++;
+
+		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+		    unlikely(skb_linearize(skb))) {
+			DPRINTK("Can't linearize skb in net_tx_action.\n");
+			kfree_skb(skb);
+			continue;
+		}
+
+		netif_rx(skb);
+		netif->dev->last_rx = jiffies;
+	}
+
+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+	    !list_empty(&pending_inuse_head)) {
+		struct netbk_tx_pending_inuse *oldest;
+
+		oldest = list_entry(pending_inuse_head.next,
+				    struct netbk_tx_pending_inuse, list);
+		mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
+	}
+}
+
+static void netif_idx_release(u16 pending_idx)
+{
+	static DEFINE_SPINLOCK(_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&_lock, flags);
+	dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
+	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+	smp_wmb();
+	dealloc_prod++;
+	spin_unlock_irqrestore(&_lock, flags);
+
+	tasklet_schedule(&net_tx_tasklet);
+}
+
+static void netif_page_release(struct page *page, unsigned int order)
+{
+	int idx = netif_page_index(page);
+	BUG_ON(order);
+	BUG_ON(idx < 0);
+	netif_idx_release(idx);
+}
+
+irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+	netif_t *netif = dev_id;
+
+	add_to_net_schedule_list_tail(netif);
+	maybe_schedule_tx_action();
+
+	if (netif_schedulable(netif) && !netbk_queue_full(netif))
+		netif_wake_queue(netif->dev);
+
+	return IRQ_HANDLED;
+}
+
+static void make_tx_response(netif_t *netif,
+			     netif_tx_request_t *txp,
+			     s8       st)
+{
+	RING_IDX i = netif->tx.rsp_prod_pvt;
+	netif_tx_response_t *resp;
+	int notify;
+
+	resp = RING_GET_RESPONSE(&netif->tx, i);
+	resp->id     = txp->id;
+	resp->status = st;
+
+	if (txp->flags & NETTXF_extra_info)
+		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
+
+	netif->tx.rsp_prod_pvt = ++i;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+	if (notify)
+		notify_remote_via_irq(netif->irq);
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+	if (i == netif->tx.req_cons) {
+		int more_to_do;
+		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+		if (more_to_do)
+			add_to_net_schedule_list_tail(netif);
+	}
+#endif
+}
+
+static netif_rx_response_t *make_rx_response(netif_t *netif,
+					     u16      id,
+					     s8       st,
+					     u16      offset,
+					     u16      size,
+					     u16      flags)
+{
+	RING_IDX i = netif->rx.rsp_prod_pvt;
+	netif_rx_response_t *resp;
+
+	resp = RING_GET_RESPONSE(&netif->rx, i);
+	resp->offset     = offset;
+	resp->flags      = flags;
+	resp->id         = id;
+	resp->status     = (s16)size;
+	if (st < 0)
+		resp->status = (s16)st;
+
+	netif->rx.rsp_prod_pvt = ++i;
+
+	return resp;
+}
+
+#ifdef NETBE_DEBUG_INTERRUPT
+static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct list_head *ent;
+	netif_t *netif;
+	int i = 0;
+
+	printk(KERN_ALERT "netif_schedule_list:\n");
+	spin_lock_irq(&net_schedule_list_lock);
+
+	list_for_each (ent, &net_schedule_list) {
+		netif = list_entry(ent, netif_t, list);
+		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+		       "rx_resp_prod=%08x\n",
+		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
+		printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
+		       netif->tx.req_cons, netif->tx.rsp_prod_pvt);
+		printk(KERN_ALERT "   shared(rx_req_prod=%08x "
+		       "rx_resp_prod=%08x\n",
+		       netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
+		printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
+		       netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
+		printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
+		       netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
+		i++;
+	}
+
+	spin_unlock_irq(&net_schedule_list_lock);
+	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+
+	return IRQ_HANDLED;
+}
+#endif
+
+static int __init netback_init(void)
+{
+	int i;
+	struct page *page;
+
+	if (!is_running_on_xen())
+		return -ENODEV;
+
+	/* We can increase reservation by this much in net_rx_action(). */
+	balloon_update_driver_allowance(NET_RX_RING_SIZE);
+
+	skb_queue_head_init(&rx_queue);
+	skb_queue_head_init(&tx_queue);
+
+	init_timer(&net_timer);
+	net_timer.data = 0;
+	net_timer.function = net_alarm;
+
+	init_timer(&netbk_tx_pending_timer);
+	netbk_tx_pending_timer.data = 0;
+	netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
+
+	mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+	if (mmap_pages == NULL) {
+		printk("%s: out of memory\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < MAX_PENDING_REQS; i++) {
+		page = mmap_pages[i];
+		SetPageForeign(page, netif_page_release);
+		netif_set_page_index(page, i);
+		INIT_LIST_HEAD(&pending_inuse[i].list);
+	}
+
+	pending_cons = 0;
+	pending_prod = MAX_PENDING_REQS;
+	for (i = 0; i < MAX_PENDING_REQS; i++)
+		pending_ring[i] = i;
+
+	spin_lock_init(&net_schedule_list_lock);
+	INIT_LIST_HEAD(&net_schedule_list);
+
+	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+	if (MODPARM_copy_skb) {
+		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+					      NULL, 0))
+			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
+		else
+			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+	}
+
+	netif_accel_init();
+
+	netif_xenbus_init();
+
+#ifdef NETBE_DEBUG_INTERRUPT
+	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+				      0,
+				      netif_be_dbg,
+				      SA_SHIRQ,
+				      "net-be-dbg",
+				      &netif_be_dbg);
+#endif
+
+	return 0;
+}
+
+module_init(netback_init);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
new file mode 100644
index 0000000..d7faeb6
--- /dev/null
+++ b/drivers/xen/netback/xenbus.c
@@ -0,0 +1,454 @@
+/*  Xenbus code for netif backend
+    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
+    Copyright (C) 2005 XenSource Ltd
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <stdarg.h>
+#include <linux/module.h>
+#include <xen/xenbus.h>
+#include "common.h"
+
+#if 0
+#undef DPRINTK
+#define DPRINTK(fmt, args...) \
+    printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
+#endif
+
+
+static int connect_rings(struct backend_info *);
+static void connect(struct backend_info *);
+static void backend_create_netif(struct backend_info *be);
+
+static int netback_remove(struct xenbus_device *dev)
+{
+	struct backend_info *be = dev->dev.driver_data;
+
+	netback_remove_accelerators(be, dev);
+
+	if (be->netif) {
+		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+		netif_disconnect(be->netif);
+		be->netif = NULL;
+	}
+	kfree(be);
+	dev->dev.driver_data = NULL;
+	return 0;
+}
+
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and switch to InitWait.
+ */
+static int netback_probe(struct xenbus_device *dev,
+			 const struct xenbus_device_id *id)
+{
+	const char *message;
+	struct xenbus_transaction xbt;
+	int err;
+	int sg;
+	struct backend_info *be = kzalloc(sizeof(struct backend_info),
+					  GFP_KERNEL);
+	if (!be) {
+		xenbus_dev_fatal(dev, -ENOMEM,
+				 "allocating backend structure");
+		return -ENOMEM;
+	}
+
+	be->dev = dev;
+	dev->dev.driver_data = be;
+
+	sg = 1;
+	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+		sg = 0;
+
+	do {
+		err = xenbus_transaction_start(&xbt);
+		if (err) {
+			xenbus_dev_fatal(dev, err, "starting transaction");
+			goto fail;
+		}
+
+		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
+		if (err) {
+			message = "writing feature-sg";
+			goto abort_transaction;
+		}
+
+		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
+				    "%d", sg);
+		if (err) {
+			message = "writing feature-gso-tcpv4";
+			goto abort_transaction;
+		}
+
+		/* We support rx-copy path. */
+		err = xenbus_printf(xbt, dev->nodename,
+				    "feature-rx-copy", "%d", 1);
+		if (err) {
+			message = "writing feature-rx-copy";
+			goto abort_transaction;
+		}
+
+		/*
+		 * We don't support rx-flip path (except old guests who don't
+		 * grok this feature flag).
+		 */
+		err = xenbus_printf(xbt, dev->nodename,
+				    "feature-rx-flip", "%d", 0);
+		if (err) {
+			message = "writing feature-rx-flip";
+			goto abort_transaction;
+		}
+
+		err = xenbus_transaction_end(xbt, 0);
+	} while (err == -EAGAIN);
+
+	if (err) {
+		xenbus_dev_fatal(dev, err, "completing transaction");
+		goto fail;
+	}
+
+	netback_probe_accelerators(be, dev);
+
+	err = xenbus_switch_state(dev, XenbusStateInitWait);
+	if (err)
+		goto fail;
+
+	/* This kicks hotplug scripts, so do it immediately. */
+	backend_create_netif(be);
+
+	return 0;
+
+abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(dev, err, "%s", message);
+fail:
+	DPRINTK("failed");
+	netback_remove(dev);
+	return err;
+}
+
+
+/**
+ * Handle the creation of the hotplug script environment.  We add the script
+ * and vif variables to the environment, for the benefit of the vif-* hotplug
+ * scripts.
+ */
+static int netback_uevent(struct xenbus_device *xdev, char **envp,
+			  int num_envp, char *buffer, int buffer_size)
+{
+	struct backend_info *be = xdev->dev.driver_data;
+	netif_t *netif = be->netif;
+	int i = 0, length = 0;
+	char *val;
+
+	DPRINTK("netback_uevent");
+
+	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+	if (IS_ERR(val)) {
+		int err = PTR_ERR(val);
+		xenbus_dev_fatal(xdev, err, "reading script");
+		return err;
+	}
+	else {
+		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
+			       &length, "script=%s", val);
+		kfree(val);
+	}
+
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "vif=%s", netif->dev->name);
+
+	envp[i] = NULL;
+
+	return 0;
+}
+
+
+static void backend_create_netif(struct backend_info *be)
+{
+	int err;
+	long handle;
+	struct xenbus_device *dev = be->dev;
+
+	if (be->netif != NULL)
+		return;
+
+	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
+	if (err != 1) {
+		xenbus_dev_fatal(dev, err, "reading handle");
+		return;
+	}
+
+	be->netif = netif_alloc(dev->otherend_id, handle);
+	if (IS_ERR(be->netif)) {
+		err = PTR_ERR(be->netif);
+		be->netif = NULL;
+		xenbus_dev_fatal(dev, err, "creating interface");
+		return;
+	}
+
+	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
+}
+
+
+/**
+ * Callback received when the frontend's state changes.
+ */
+static void frontend_changed(struct xenbus_device *dev,
+			     enum xenbus_state frontend_state)
+{
+	struct backend_info *be = dev->dev.driver_data;
+
+	DPRINTK("%s", xenbus_strstate(frontend_state));
+
+	be->frontend_state = frontend_state;
+
+	switch (frontend_state) {
+	case XenbusStateInitialising:
+		if (dev->state == XenbusStateClosed) {
+			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
+			       __FUNCTION__, dev->nodename);
+			xenbus_switch_state(dev, XenbusStateInitWait);
+		}
+		break;
+
+	case XenbusStateInitialised:
+		break;
+
+	case XenbusStateConnected:
+		if (dev->state == XenbusStateConnected)
+			break;
+		backend_create_netif(be);
+		if (be->netif)
+			connect(be);
+		break;
+
+	case XenbusStateClosing:
+		if (be->netif) {
+			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+			netif_disconnect(be->netif);
+			be->netif = NULL;
+		}
+		xenbus_switch_state(dev, XenbusStateClosing);
+		break;
+
+	case XenbusStateClosed:
+		xenbus_switch_state(dev, XenbusStateClosed);
+		if (xenbus_dev_is_online(dev))
+			break;
+		/* fall through if not online */
+	case XenbusStateUnknown:
+		device_unregister(&dev->dev);
+		break;
+
+	default:
+		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
+				 frontend_state);
+		break;
+	}
+}
+
+
+static void xen_net_read_rate(struct xenbus_device *dev,
+			      unsigned long *bytes, unsigned long *usec)
+{
+	char *s, *e;
+	unsigned long b, u;
+	char *ratestr;
+
+	/* Default to unlimited bandwidth. */
+	*bytes = ~0UL;
+	*usec = 0;
+
+	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
+	if (IS_ERR(ratestr))
+		return;
+
+	s = ratestr;
+	b = simple_strtoul(s, &e, 10);
+	if ((s == e) || (*e != ','))
+		goto fail;
+
+	s = e + 1;
+	u = simple_strtoul(s, &e, 10);
+	if ((s == e) || (*e != '\0'))
+		goto fail;
+
+	*bytes = b;
+	*usec = u;
+
+	kfree(ratestr);
+	return;
+
+ fail:
+	WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
+	kfree(ratestr);
+}
+
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+	char *s, *e, *macstr;
+	int i;
+
+	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
+	if (IS_ERR(macstr))
+		return PTR_ERR(macstr);
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac[i] = simple_strtoul(s, &e, 16);
+		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+			kfree(macstr);
+			return -ENOENT;
+		}
+		s = e+1;
+	}
+
+	kfree(macstr);
+	return 0;
+}
+
+static void connect(struct backend_info *be)
+{
+	int err;
+	struct xenbus_device *dev = be->dev;
+
+	err = connect_rings(be);
+	if (err)
+		return;
+
+	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+		return;
+	}
+
+	xen_net_read_rate(dev, &be->netif->credit_bytes,
+			  &be->netif->credit_usec);
+	be->netif->remaining_credit = be->netif->credit_bytes;
+
+	xenbus_switch_state(dev, XenbusStateConnected);
+
+	netif_wake_queue(be->netif->dev);
+}
+
+
+static int connect_rings(struct backend_info *be)
+{
+	struct xenbus_device *dev = be->dev;
+	unsigned long tx_ring_ref, rx_ring_ref;
+	unsigned int evtchn, rx_copy;
+	int err;
+	int val;
+
+	DPRINTK("");
+
+	err = xenbus_gather(XBT_NIL, dev->otherend,
+			    "tx-ring-ref", "%lu", &tx_ring_ref,
+			    "rx-ring-ref", "%lu", &rx_ring_ref,
+			    "event-channel", "%u", &evtchn, NULL);
+	if (err) {
+		xenbus_dev_fatal(dev, err,
+				 "reading %s/ring-ref and event-channel",
+				 dev->otherend);
+		return err;
+	}
+
+	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
+			   &rx_copy);
+	if (err == -ENOENT) {
+		err = 0;
+		rx_copy = 0;
+	}
+	if (err < 0) {
+		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
+				 dev->otherend);
+		return err;
+	}
+	be->netif->copying_receiver = !!rx_copy;
+
+	if (be->netif->dev->tx_queue_len != 0) {
+		if (xenbus_scanf(XBT_NIL, dev->otherend,
+				 "feature-rx-notify", "%d", &val) < 0)
+			val = 0;
+		if (val)
+			be->netif->can_queue = 1;
+		else
+			/* Must be non-zero for pfifo_fast to work. */
+			be->netif->dev->tx_queue_len = 1;
+	}
+
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
+		val = 0;
+	if (val) {
+		be->netif->features |= NETIF_F_SG;
+		be->netif->dev->features |= NETIF_F_SG;
+	}
+
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
+			 &val) < 0)
+		val = 0;
+	if (val) {
+		be->netif->features |= NETIF_F_TSO;
+		be->netif->dev->features |= NETIF_F_TSO;
+	}
+
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
+			 "%d", &val) < 0)
+		val = 0;
+	if (val) {
+		be->netif->features &= ~NETIF_F_IP_CSUM;
+		be->netif->dev->features &= ~NETIF_F_IP_CSUM;
+	}
+
+	/* Map the shared frame, irq etc. */
+	err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
+	if (err) {
+		xenbus_dev_fatal(dev, err,
+				 "mapping shared-frames %lu/%lu port %u",
+				 tx_ring_ref, rx_ring_ref, evtchn);
+		return err;
+	}
+	return 0;
+}
+
+
+/* ** Driver Registration ** */
+
+
+static const struct xenbus_device_id netback_ids[] = {
+	{ "vif" },
+	{ "" }
+};
+
+
+static struct xenbus_driver netback = {
+	.name = "vif",
+	.owner = THIS_MODULE,
+	.ids = netback_ids,
+	.probe = netback_probe,
+	.remove = netback_remove,
+	.uevent = netback_uevent,
+	.otherend_changed = frontend_changed,
+};
+
+
+void netif_xenbus_init(void)
+{
+	xenbus_register_backend(&netback);
+}
--
1.7.4


From 5b30803bf5f58ee980edd8d88a2d73dda995ee93 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 9 Feb 2009 12:05:52 -0800
Subject: [PATCH 059/197] xen: netback: first cut at porting to upstream and cleaning up

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/Kconfig             |    2 +-
 drivers/xen/netback/Makefile    |    2 +-
 drivers/xen/netback/common.h    |   33 +++---
 drivers/xen/netback/interface.c |   37 +++---
 drivers/xen/netback/netback.c   |  248 ++++++++++++++++++++++++---------------
 drivers/xen/netback/xenbus.c    |   25 ++--
 6 files changed, 201 insertions(+), 146 deletions(-)

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 7e83d43..30290a8 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -38,7 +38,7 @@ config XEN_BACKEND
 	  to other virtual machines.

 config XEN_NETDEV_BACKEND
-       bool "Xen backend network device"
+       tristate "Xen backend network device"
        depends on XEN_BACKEND && NET
        help
          Implement the network backend driver, which passes packets
diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
index f4a0c51..a01a1a3 100644
--- a/drivers/xen/netback/Makefile
+++ b/drivers/xen/netback/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o

-netbk-y   := netback.o xenbus.o interface.o
+netbk-y := netback.o xenbus.o interface.o
diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 9a54d57..65b88f4 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -43,8 +43,7 @@
 #include <asm/io.h>
 #include <asm/pgalloc.h>
 #include <xen/interface/grant_table.h>
-#include <xen/gnttab.h>
-#include <xen/driver_util.h>
+#include <xen/grant_table.h>
 #include <xen/xenbus.h>

 #define DPRINTK(_f, _a...)			\
@@ -55,7 +54,7 @@
 #define WPRINTK(fmt, args...)				\
 	printk(KERN_WARNING "xen_net: " fmt, ##args)

-typedef struct netif_st {
+struct xen_netif {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
 	unsigned int     handle;
@@ -70,8 +69,8 @@ typedef struct netif_st {
 	unsigned int     irq;

 	/* The shared rings and indexes. */
-	netif_tx_back_ring_t tx;
-	netif_rx_back_ring_t rx;
+	struct xen_netif_tx_back_ring tx;
+	struct xen_netif_rx_back_ring rx;
 	struct vm_struct *tx_comms_area;
 	struct vm_struct *rx_comms_area;

@@ -103,7 +102,7 @@ typedef struct netif_st {
 	unsigned int carrier;

 	wait_queue_head_t waiting_to_free;
-} netif_t;
+};

 /*
  * Implement our own carrier flag: the network stack's version causes delays
@@ -141,7 +140,7 @@ struct netback_accelerator {

 struct backend_info {
 	struct xenbus_device *dev;
-	netif_t *netif;
+	struct xen_netif *netif;
 	enum xenbus_state frontend_state;

 	/* State relating to the netback accelerator */
@@ -174,13 +173,13 @@ extern
 void netif_accel_init(void);


-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)

-void netif_disconnect(netif_t *netif);
+void netif_disconnect(struct xen_netif *netif);

-netif_t *netif_alloc(domid_t domid, unsigned int handle);
-int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+struct xen_netif *netif_alloc(domid_t domid, unsigned int handle);
+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
 	      unsigned long rx_ring_ref, unsigned int evtchn);

 #define netif_get(_b) (atomic_inc(&(_b)->refcnt))
@@ -195,22 +194,22 @@ void netif_xenbus_init(void);
 #define netif_schedulable(netif)				\
 	(netif_running((netif)->dev) && netback_carrier_ok(netif))

-void netif_schedule_work(netif_t *netif);
-void netif_deschedule_work(netif_t *netif);
+void netif_schedule_work(struct xen_netif *netif);
+void netif_deschedule_work(struct xen_netif *netif);

 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
 struct net_device_stats *netif_be_get_stats(struct net_device *dev);
-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+irqreturn_t netif_be_int(int irq, void *dev_id);

 static inline int netbk_can_queue(struct net_device *dev)
 {
-	netif_t *netif = netdev_priv(dev);
+	struct xen_netif *netif = netdev_priv(dev);
 	return netif->can_queue;
 }

 static inline int netbk_can_sg(struct net_device *dev)
 {
-	netif_t *netif = netdev_priv(dev);
+	struct xen_netif *netif = netdev_priv(dev);
 	return netif->features & NETIF_F_SG;
 }

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index 7e67941..d184ad7 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -34,6 +34,9 @@
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>

+#include <xen/events.h>
+#include <asm/xen/hypercall.h>
+
 /*
  * Module parameter 'queue_length':
  *
@@ -51,13 +54,13 @@
 static unsigned long netbk_queue_length = 32;
 module_param_named(queue_length, netbk_queue_length, ulong, 0);

-static void __netif_up(netif_t *netif)
+static void __netif_up(struct xen_netif *netif)
 {
 	enable_irq(netif->irq);
 	netif_schedule_work(netif);
 }

-static void __netif_down(netif_t *netif)
+static void __netif_down(struct xen_netif *netif)
 {
 	disable_irq(netif->irq);
 	netif_deschedule_work(netif);
@@ -65,7 +68,7 @@ static void __netif_down(netif_t *netif)

 static int net_open(struct net_device *dev)
 {
-	netif_t *netif = netdev_priv(dev);
+	struct xen_netif *netif = netdev_priv(dev);
 	if (netback_carrier_ok(netif)) {
 		__netif_up(netif);
 		netif_start_queue(dev);
@@ -75,7 +78,7 @@ static int net_open(struct net_device *dev)

 static int net_close(struct net_device *dev)
 {
-	netif_t *netif = netdev_priv(dev);
+	struct xen_netif *netif = netdev_priv(dev);
 	if (netback_carrier_ok(netif))
 		__netif_down(netif);
 	netif_stop_queue(dev);
@@ -95,7 +98,7 @@ static int netbk_change_mtu(struct net_device *dev, int mtu)
 static int netbk_set_sg(struct net_device *dev, u32 data)
 {
 	if (data) {
-		netif_t *netif = netdev_priv(dev);
+		struct xen_netif *netif = netdev_priv(dev);

 		if (!(netif->features & NETIF_F_SG))
 			return -ENOSYS;
@@ -107,7 +110,7 @@ static int netbk_set_sg(struct net_device *dev, u32 data)
 static int netbk_set_tso(struct net_device *dev, u32 data)
 {
 	if (data) {
-		netif_t *netif = netdev_priv(dev);
+		struct xen_netif *netif = netdev_priv(dev);

 		if (!(netif->features & NETIF_F_TSO))
 			return -ENOSYS;
@@ -127,15 +130,15 @@ static struct ethtool_ops network_ethtool_ops =
 	.get_link = ethtool_op_get_link,
 };

-netif_t *netif_alloc(domid_t domid, unsigned int handle)
+struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
 {
 	int err = 0;
 	struct net_device *dev;
-	netif_t *netif;
+	struct xen_netif *netif;
 	char name[IFNAMSIZ] = {};

 	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
-	dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
+	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
 	if (dev == NULL) {
 		DPRINTK("Could not create netif: out of memory\n");
 		return ERR_PTR(-ENOMEM);
@@ -194,7 +197,7 @@ netif_t *netif_alloc(domid_t domid, unsigned int handle)
 }

 static int map_frontend_pages(
-	netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
+	struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
 {
 	struct gnttab_map_grant_ref op;

@@ -229,7 +232,7 @@ static int map_frontend_pages(
 	return 0;
 }

-static void unmap_frontend_pages(netif_t *netif)
+static void unmap_frontend_pages(struct xen_netif *netif)
 {
 	struct gnttab_unmap_grant_ref op;

@@ -246,12 +249,12 @@ static void unmap_frontend_pages(netif_t *netif)
 		BUG();
 }

-int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
 	      unsigned long rx_ring_ref, unsigned int evtchn)
 {
 	int err = -ENOMEM;
-	netif_tx_sring_t *txs;
-	netif_rx_sring_t *rxs;
+	struct xen_netif_tx_sring *txs;
+	struct xen_netif_rx_sring *rxs;

 	/* Already connected through? */
 	if (netif->irq)
@@ -276,10 +279,10 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref,
 	netif->irq = err;
 	disable_irq(netif->irq);

-	txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
+	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
 	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);

-	rxs = (netif_rx_sring_t *)
+	rxs = (struct xen_netif_rx_sring *)
 		((char *)netif->rx_comms_area->addr);
 	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);

@@ -303,7 +306,7 @@ err_rx:
 	return err;
 }

-void netif_disconnect(netif_t *netif)
+void netif_disconnect(struct xen_netif *netif)
 {
 	if (netback_carrier_ok(netif)) {
 		rtnl_lock();
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index db629d4..c959075 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -35,9 +35,17 @@
  */

 #include "common.h"
+
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
 #include <xen/balloon.h>
+#include <xen/events.h>
 #include <xen/interface/memory.h>

+#include <asm/xen/hypercall.h>
+#include <asm/xen/page.h>
+
 /*define NETBE_DEBUG_INTERRUPT*/

 struct netbk_rx_meta {
@@ -51,11 +59,12 @@ struct netbk_tx_pending_inuse {
 	unsigned long alloc_time;
 };

+
 static void netif_idx_release(u16 pending_idx);
-static void make_tx_response(netif_t *netif,
-			     netif_tx_request_t *txp,
+static void make_tx_response(struct xen_netif *netif,
+			     struct xen_netif_tx_request *txp,
 			     s8       st);
-static netif_rx_response_t *make_rx_response(netif_t *netif,
+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
 					     u16      id,
 					     s8       st,
 					     u16      offset,
@@ -108,8 +117,8 @@ static inline int netif_page_index(struct page *pg)
 #define PKT_PROT_LEN 64

 static struct pending_tx_info {
-	netif_tx_request_t req;
-	netif_t *netif;
+	struct xen_netif_tx_request req;
+	struct xen_netif *netif;
 } pending_tx_info[MAX_PENDING_REQS];
 static u16 pending_ring[MAX_PENDING_REQS];
 typedef unsigned int PEND_RING_IDX;
@@ -128,8 +137,8 @@ static LIST_HEAD(pending_inuse_head);
 static struct sk_buff_head tx_queue;

 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
-static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];

 static struct list_head net_schedule_list;
 static spinlock_t net_schedule_list_lock;
@@ -195,7 +204,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
 		goto err;

 	skb_reserve(nskb, 16 + NET_IP_ALIGN);
-	headlen = nskb->end - nskb->data;
+	headlen = skb_end_pointer(nskb) - nskb->data;
 	if (headlen > skb_headlen(skb))
 		headlen = skb_headlen(skb);
 	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
@@ -243,9 +252,9 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)

 	offset = nskb->data - skb->data;

-	nskb->h.raw = skb->h.raw + offset;
-	nskb->nh.raw = skb->nh.raw + offset;
-	nskb->mac.raw = skb->mac.raw + offset;
+	nskb->transport_header = skb->transport_header + offset;
+	nskb->network_header = skb->network_header + offset;
+	nskb->mac_header = skb->mac_header + offset;

 	return nskb;

@@ -255,14 +264,14 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
 	return NULL;
 }

-static inline int netbk_max_required_rx_slots(netif_t *netif)
+static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
 {
 	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
 		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
 	return 1; /* all in one */
 }

-static inline int netbk_queue_full(netif_t *netif)
+static inline int netbk_queue_full(struct xen_netif *netif)
 {
 	RING_IDX peek   = netif->rx_req_cons_peek;
 	RING_IDX needed = netbk_max_required_rx_slots(netif);
@@ -273,14 +282,14 @@ static inline int netbk_queue_full(netif_t *netif)

 static void tx_queue_callback(unsigned long data)
 {
-	netif_t *netif = (netif_t *)data;
+	struct xen_netif *netif = (struct xen_netif *)data;
 	if (netif_schedulable(netif))
 		netif_wake_queue(netif->dev);
 }

 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	netif_t *netif = netdev_priv(dev);
+	struct xen_netif *netif = netdev_priv(dev);

 	BUG_ON(skb->dev != dev);

@@ -302,7 +311,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		/* Copy only the header fields we use in this driver. */
 		nskb->dev = skb->dev;
 		nskb->ip_summed = skb->ip_summed;
-		nskb->proto_data_valid = skb->proto_data_valid;
 		dev_kfree_skb(skb);
 		skb = nskb;
 	}
@@ -366,25 +374,25 @@ struct netrx_pending_operations {
 	unsigned mcl_prod, mcl_cons;
 	unsigned copy_prod, copy_cons;
 	unsigned meta_prod, meta_cons;
-	mmu_update_t *mmu;
-	gnttab_transfer_t *trans;
-	gnttab_copy_t *copy;
-	multicall_entry_t *mcl;
+	struct mmu_update *mmu;
+	struct gnttab_transfer *trans;
+	struct gnttab_copy *copy;
+	struct multicall_entry *mcl;
 	struct netbk_rx_meta *meta;
 };

 /* Set up the grant operations for this fragment.  If it's a flipping
    interface, we also set up the unmap request from here. */
-static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
+static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
 			  int i, struct netrx_pending_operations *npo,
 			  struct page *page, unsigned long size,
 			  unsigned long offset)
 {
-	mmu_update_t *mmu;
-	gnttab_transfer_t *gop;
-	gnttab_copy_t *copy_gop;
-	multicall_entry_t *mcl;
-	netif_rx_request_t *req;
+	struct mmu_update *mmu;
+	struct gnttab_transfer *gop;
+	struct gnttab_copy *copy_gop;
+	struct multicall_entry *mcl;
+	struct xen_netif_rx_request *req;
 	unsigned long old_mfn, new_mfn;
 	int idx = netif_page_index(page);

@@ -426,12 +434,12 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
 			mcl = npo->mcl + npo->mcl_prod++;
 			MULTI_update_va_mapping(mcl,
 					     (unsigned long)page_address(page),
-					     pfn_pte_ma(new_mfn, PAGE_KERNEL),
+					     mfn_pte(new_mfn, PAGE_KERNEL),
 					     0);

 			mmu = npo->mmu + npo->mmu_prod++;
-			mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
-				MMU_MACHPHYS_UPDATE;
+			mmu->ptr = ((phys_addr_t)new_mfn << PAGE_SHIFT) |
+				    MMU_MACHPHYS_UPDATE;
 			mmu->val = page_to_pfn(page);
 		}

@@ -446,7 +454,7 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
 static void netbk_gop_skb(struct sk_buff *skb,
 			  struct netrx_pending_operations *npo)
 {
-	netif_t *netif = netdev_priv(skb->dev);
+	struct xen_netif *netif = netdev_priv(skb->dev);
 	int nr_frags = skb_shinfo(skb)->nr_frags;
 	int i;
 	int extra;
@@ -494,9 +502,9 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
 static int netbk_check_gop(int nr_frags, domid_t domid,
 			   struct netrx_pending_operations *npo)
 {
-	multicall_entry_t *mcl;
-	gnttab_transfer_t *gop;
-	gnttab_copy_t     *copy_op;
+	struct multicall_entry *mcl;
+	struct gnttab_transfer *gop;
+	struct gnttab_copy     *copy_op;
 	int status = NETIF_RSP_OKAY;
 	int i;

@@ -534,7 +542,7 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
 	return status;
 }

-static void netbk_add_frag_responses(netif_t *netif, int status,
+static void netbk_add_frag_responses(struct xen_netif *netif, int status,
 				     struct netbk_rx_meta *meta, int nr_frags)
 {
 	int i;
@@ -555,11 +563,11 @@ static void netbk_add_frag_responses(netif_t *netif, int status,

 static void net_rx_action(unsigned long unused)
 {
-	netif_t *netif = NULL;
+	struct xen_netif *netif = NULL;
 	s8 status;
 	u16 id, irq, flags;
-	netif_rx_response_t *resp;
-	multicall_entry_t *mcl;
+	struct xen_netif_rx_response *resp;
+	struct multicall_entry *mcl;
 	struct sk_buff_head rxq;
 	struct sk_buff *skb;
 	int notify_nr = 0;
@@ -572,10 +580,10 @@ static void net_rx_action(unsigned long unused)
 	 * Putting hundreds of bytes on the stack is considered rude.
 	 * Static works because a tasklet can only be on one CPU at any time.
 	 */
-	static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
-	static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
-	static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
-	static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
+	static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
+	static struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+	static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
+	static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
 	static unsigned char rx_notify[NR_IRQS];
 	static u16 notify_list[NET_RX_RING_SIZE];
 	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
@@ -596,7 +604,7 @@ static void net_rx_action(unsigned long unused)
 		*(int *)skb->cb = nr_frags;

 		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
-		    !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
+		    !((struct xen_netif *)netdev_priv(skb->dev))->copying_receiver &&
 		    check_mfn(nr_frags + 1)) {
 			/* Memory squeeze? Back off for an arbitrary while. */
 			if ( net_ratelimit() )
@@ -692,9 +700,10 @@ static void net_rx_action(unsigned long unused)
 		id = meta[npo.meta_cons].id;
 		flags = nr_frags ? NETRXF_more_data : 0;

-		if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
+		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
-		else if (skb->proto_data_valid) /* remote but checksummed? */
+		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+			/* remote but checksummed. */
 			flags |= NETRXF_data_validated;

 		if (meta[npo.meta_cons].copy)
@@ -705,8 +714,8 @@ static void net_rx_action(unsigned long unused)
 					skb_headlen(skb), flags);

 		if (meta[npo.meta_cons].frag.size) {
-			struct netif_extra_info *gso =
-				(struct netif_extra_info *)
+			struct xen_netif_extra_info *gso =
+				(struct xen_netif_extra_info *)
 				RING_GET_RESPONSE(&netif->rx,
 						  netif->rx.rsp_prod_pvt++);

@@ -769,16 +778,16 @@ static void netbk_tx_pending_timeout(unsigned long unused)

 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
 {
-	netif_t *netif = netdev_priv(dev);
+	struct xen_netif *netif = netdev_priv(dev);
 	return &netif->stats;
 }

-static int __on_net_schedule_list(netif_t *netif)
+static int __on_net_schedule_list(struct xen_netif *netif)
 {
 	return netif->list.next != NULL;
 }

-static void remove_from_net_schedule_list(netif_t *netif)
+static void remove_from_net_schedule_list(struct xen_netif *netif)
 {
 	spin_lock_irq(&net_schedule_list_lock);
 	if (likely(__on_net_schedule_list(netif))) {
@@ -789,7 +798,7 @@ static void remove_from_net_schedule_list(netif_t *netif)
 	spin_unlock_irq(&net_schedule_list_lock);
 }

-static void add_to_net_schedule_list_tail(netif_t *netif)
+static void add_to_net_schedule_list_tail(struct xen_netif *netif)
 {
 	if (__on_net_schedule_list(netif))
 		return;
@@ -811,7 +820,7 @@ static void add_to_net_schedule_list_tail(netif_t *netif)
  * If we may be buffer transmit buffers for any reason then we must be rather
  * more conservative and treat this as the final check for pending work.
  */
-void netif_schedule_work(netif_t *netif)
+void netif_schedule_work(struct xen_netif *netif)
 {
 	int more_to_do;

@@ -827,13 +836,13 @@ void netif_schedule_work(netif_t *netif)
 	}
 }

-void netif_deschedule_work(netif_t *netif)
+void netif_deschedule_work(struct xen_netif *netif)
 {
 	remove_from_net_schedule_list(netif);
 }


-static void tx_add_credit(netif_t *netif)
+static void tx_add_credit(struct xen_netif *netif)
 {
 	unsigned long max_burst, max_credit;

@@ -855,7 +864,7 @@ static void tx_add_credit(netif_t *netif)

 static void tx_credit_callback(unsigned long data)
 {
-	netif_t *netif = (netif_t *)data;
+	struct xen_netif *netif = (struct xen_netif *)data;
 	tx_add_credit(netif);
 	netif_schedule_work(netif);
 }
@@ -869,10 +878,10 @@ static inline int copy_pending_req(PEND_RING_IDX pending_idx)
 inline static void net_tx_action_dealloc(void)
 {
 	struct netbk_tx_pending_inuse *inuse, *n;
-	gnttab_unmap_grant_ref_t *gop;
+	struct gnttab_unmap_grant_ref *gop;
 	u16 pending_idx;
 	PEND_RING_IDX dc, dp;
-	netif_t *netif;
+	struct xen_netif *netif;
 	int ret;
 	LIST_HEAD(list);

@@ -954,7 +963,7 @@ inline static void net_tx_action_dealloc(void)
 	}
 }

-static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
+static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end)
 {
 	RING_IDX cons = netif->tx.req_cons;

@@ -969,8 +978,8 @@ static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
 	netif_put(netif);
 }

-static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
-				netif_tx_request_t *txp, int work_to_do)
+static int netbk_count_requests(struct xen_netif *netif, struct xen_netif_tx_request *first,
+				struct xen_netif_tx_request *txp, int work_to_do)
 {
 	RING_IDX cons = netif->tx.req_cons;
 	int frags = 0;
@@ -1009,10 +1018,10 @@ static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
 	return frags;
 }

-static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
+static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
 						  struct sk_buff *skb,
-						  netif_tx_request_t *txp,
-						  gnttab_map_grant_ref_t *mop)
+						  struct xen_netif_tx_request *txp,
+						  struct gnttab_map_grant_ref *mop)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	skb_frag_t *frags = shinfo->frags;
@@ -1039,12 +1048,12 @@ static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
 }

 static int netbk_tx_check_mop(struct sk_buff *skb,
-			       gnttab_map_grant_ref_t **mopp)
+			       struct gnttab_map_grant_ref **mopp)
 {
-	gnttab_map_grant_ref_t *mop = *mopp;
+	struct gnttab_map_grant_ref *mop = *mopp;
 	int pending_idx = *((u16 *)skb->data);
-	netif_t *netif = pending_tx_info[pending_idx].netif;
-	netif_tx_request_t *txp;
+	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+	struct xen_netif_tx_request *txp;
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	int nr_frags = shinfo->nr_frags;
 	int i, err, start;
@@ -1118,7 +1127,7 @@ static void netbk_fill_frags(struct sk_buff *skb)

 	for (i = 0; i < nr_frags; i++) {
 		skb_frag_t *frag = shinfo->frags + i;
-		netif_tx_request_t *txp;
+		struct xen_netif_tx_request *txp;
 		unsigned long pending_idx;

 		pending_idx = (unsigned long)frag->page;
@@ -1138,10 +1147,10 @@ static void netbk_fill_frags(struct sk_buff *skb)
 	}
 }

-int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
+int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras,
 		     int work_to_do)
 {
-	struct netif_extra_info extra;
+	struct xen_netif_extra_info extra;
 	RING_IDX cons = netif->tx.req_cons;

 	do {
@@ -1166,7 +1175,7 @@ int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
 	return work_to_do;
 }

-static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
+static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso)
 {
 	if (!gso->u.gso.size) {
 		DPRINTK("GSO size must not be zero.\n");
@@ -1189,18 +1198,57 @@ static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
 	return 0;
 }

+static int skb_checksum_setup(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	unsigned char *th;
+	int err = -EPROTO;
+
+	if (skb->protocol != htons(ETH_P_IP))
+		goto out;
+
+	iph = (void *)skb->data;
+	th = skb->data + 4 * iph->ihl;
+	if (th >= skb_tail_pointer(skb))
+		goto out;
+
+	skb->csum_start = th - skb->head;
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+		skb->csum_offset = offsetof(struct tcphdr, check);
+		break;
+	case IPPROTO_UDP:
+		skb->csum_offset = offsetof(struct udphdr, check);
+		break;
+	default:
+		if (net_ratelimit())
+			printk(KERN_ERR "Attempting to checksum a non-"
+			       "TCP/UDP packet, dropping a protocol"
+			       " %d packet", iph->protocol);
+		goto out;
+	}
+
+	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
+		goto out;
+
+	err = 0;
+
+out:
+	return err;
+}
+
 /* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
 	struct list_head *ent;
 	struct sk_buff *skb;
-	netif_t *netif;
-	netif_tx_request_t txreq;
-	netif_tx_request_t txfrags[MAX_SKB_FRAGS];
-	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+	struct xen_netif *netif;
+	struct xen_netif_tx_request txreq;
+	struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 	u16 pending_idx;
 	RING_IDX i;
-	gnttab_map_grant_ref_t *mop;
+	struct gnttab_map_grant_ref *mop;
 	unsigned int data_len;
 	int ret, work_to_do;

@@ -1212,7 +1260,7 @@ static void net_tx_action(unsigned long unused)
 		!list_empty(&net_schedule_list)) {
 		/* Get a netif from the list with work to do. */
 		ent = net_schedule_list.next;
-		netif = list_entry(ent, netif_t, list);
+		netif = list_entry(ent, struct xen_netif, list);
 		netif_get(netif);
 		remove_from_net_schedule_list(netif);

@@ -1313,7 +1361,7 @@ static void net_tx_action(unsigned long unused)
 		skb_reserve(skb, 16 + NET_IP_ALIGN);

 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
-			struct netif_extra_info *gso;
+			struct xen_netif_extra_info *gso;
 			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];

 			if (netbk_set_skb_gso(skb, gso)) {
@@ -1372,7 +1420,7 @@ static void net_tx_action(unsigned long unused)

 	mop = tx_map_ops;
 	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
-		netif_tx_request_t *txp;
+		struct xen_netif_tx_request *txp;

 		pending_idx = *((u16 *)skb->data);
 		netif       = pending_tx_info[pending_idx].netif;
@@ -1403,14 +1451,10 @@ static void net_tx_action(unsigned long unused)
 		 * Old frontends do not assert data_validated but we
 		 * can infer it from csum_blank so test both flags.
 		 */
-		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			skb->proto_data_valid = 1;
-		} else {
+		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank))
+			skb->ip_summed = CHECKSUM_PARTIAL;
+		else
 			skb->ip_summed = CHECKSUM_NONE;
-			skb->proto_data_valid = 0;
-		}
-		skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);

 		netbk_fill_frags(skb);

@@ -1420,6 +1464,14 @@ static void net_tx_action(unsigned long unused)
 		netif->stats.rx_bytes += skb->len;
 		netif->stats.rx_packets++;

+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			if (skb_checksum_setup(skb)) {
+				DPRINTK("Can't setup checksum in net_tx_action\n");
+				kfree_skb(skb);
+				continue;
+			}
+		}
+
 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
 		    unlikely(skb_linearize(skb))) {
 			DPRINTK("Can't linearize skb in net_tx_action.\n");
@@ -1464,9 +1516,9 @@ static void netif_page_release(struct page *page, unsigned int order)
 	netif_idx_release(idx);
 }

-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+irqreturn_t netif_be_int(int irq, void *dev_id)
 {
-	netif_t *netif = dev_id;
+	struct xen_netif *netif = dev_id;

 	add_to_net_schedule_list_tail(netif);
 	maybe_schedule_tx_action();
@@ -1477,12 +1529,12 @@ irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 	return IRQ_HANDLED;
 }

-static void make_tx_response(netif_t *netif,
-			     netif_tx_request_t *txp,
+static void make_tx_response(struct xen_netif *netif,
+			     struct xen_netif_tx_request *txp,
 			     s8       st)
 {
 	RING_IDX i = netif->tx.rsp_prod_pvt;
-	netif_tx_response_t *resp;
+	struct xen_netif_tx_response *resp;
 	int notify;

 	resp = RING_GET_RESPONSE(&netif->tx, i);
@@ -1507,7 +1559,7 @@ static void make_tx_response(netif_t *netif,
 #endif
 }

-static netif_rx_response_t *make_rx_response(netif_t *netif,
+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
 					     u16      id,
 					     s8       st,
 					     u16      offset,
@@ -1515,7 +1567,7 @@ static netif_rx_response_t *make_rx_response(netif_t *netif,
 					     u16      flags)
 {
 	RING_IDX i = netif->rx.rsp_prod_pvt;
-	netif_rx_response_t *resp;
+	struct xen_netif_rx_response *resp;

 	resp = RING_GET_RESPONSE(&netif->rx, i);
 	resp->offset     = offset;
@@ -1534,14 +1586,14 @@ static netif_rx_response_t *make_rx_response(netif_t *netif,
 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
 {
 	struct list_head *ent;
-	netif_t *netif;
+	struct xen_netif *netif;
 	int i = 0;

 	printk(KERN_ALERT "netif_schedule_list:\n");
 	spin_lock_irq(&net_schedule_list_lock);

 	list_for_each (ent, &net_schedule_list) {
-		netif = list_entry(ent, netif_t, list);
+		netif = list_entry(ent, struct xen_netif, list);
 		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
 		       "rx_resp_prod=%08x\n",
 		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
@@ -1569,11 +1621,13 @@ static int __init netback_init(void)
 	int i;
 	struct page *page;

-	if (!is_running_on_xen())
+	printk(KERN_CRIT "*** netif_init\n");
+
+	if (!xen_domain())
 		return -ENODEV;

 	/* We can increase reservation by this much in net_rx_action(). */
-	balloon_update_driver_allowance(NET_RX_RING_SIZE);
+//	balloon_update_driver_allowance(NET_RX_RING_SIZE);

 	skb_queue_head_init(&rx_queue);
 	skb_queue_head_init(&tx_queue);
@@ -1616,7 +1670,7 @@ static int __init netback_init(void)
 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
 	}

-	netif_accel_init();
+	//netif_accel_init();

 	netif_xenbus_init();

diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index d7faeb6..ed7c006 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -37,7 +37,7 @@ static int netback_remove(struct xenbus_device *dev)
 {
 	struct backend_info *be = dev->dev.driver_data;

-	netback_remove_accelerators(be, dev);
+	//netback_remove_accelerators(be, dev);

 	if (be->netif) {
 		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
@@ -123,7 +123,7 @@ static int netback_probe(struct xenbus_device *dev,
 		goto fail;
 	}

-	netback_probe_accelerators(be, dev);
+	//netback_probe_accelerators(be, dev);

 	err = xenbus_switch_state(dev, XenbusStateInitWait);
 	if (err)
@@ -149,12 +149,10 @@ fail:
  * and vif variables to the environment, for the benefit of the vif-* hotplug
  * scripts.
  */
-static int netback_uevent(struct xenbus_device *xdev, char **envp,
-			  int num_envp, char *buffer, int buffer_size)
+static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
 {
 	struct backend_info *be = xdev->dev.driver_data;
-	netif_t *netif = be->netif;
-	int i = 0, length = 0;
+	struct xen_netif *netif = be->netif;
 	char *val;

 	DPRINTK("netback_uevent");
@@ -166,15 +164,15 @@ static int netback_uevent(struct xenbus_device *xdev, char **envp,
 		return err;
 	}
 	else {
-		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
-			       &length, "script=%s", val);
+		if (add_uevent_var(env, "script=%s", val)) {
+			kfree(val);
+			return -ENOMEM;
+		}
 		kfree(val);
 	}

-	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
-		       "vif=%s", netif->dev->name);
-
-	envp[i] = NULL;
+	if (add_uevent_var(env, "vif=%s", netif->dev->name))
+		return -ENOMEM;

 	return 0;
 }
@@ -450,5 +448,6 @@ static struct xenbus_driver netback = {

 void netif_xenbus_init(void)
 {
-	xenbus_register_backend(&netback);
+	printk(KERN_CRIT "registering netback\n");
+	(void)xenbus_register_backend(&netback);
 }
--
1.7.4


From a41a2ab9e1ac4ef8320f69f2719e973e25faff5c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 9 Feb 2009 16:39:01 -0800
Subject: [PATCH 060/197] xen: netback: don't include xen/evtchn.h

Its a usermode header for users of /dev/evtchn

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
---
 drivers/xen/netback/common.h |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 65b88f4..5665ed1 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -38,7 +38,6 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/wait.h>
-#include <xen/evtchn.h>
 #include <xen/interface/io/netif.h>
 #include <asm/io.h>
 #include <asm/pgalloc.h>
--
1.7.4


From f28a7c6148bb979acf99c0cbe3b441d0fb0853d9 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 18 Feb 2009 15:55:18 -0800
Subject: [PATCH 061/197] xen: netback: use mod_timer

__mod_timer is no longer a public API.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index c959075..e920703 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -334,7 +334,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			 */
 			netif->tx_queue_timeout.data = (unsigned long)netif;
 			netif->tx_queue_timeout.function = tx_queue_callback;
-			__mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+			mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
 		}
 	}

@@ -1299,7 +1299,7 @@ static void net_tx_action(unsigned long unused)
 					(unsigned long)netif;
 				netif->credit_timeout.function =
 					tx_credit_callback;
-				__mod_timer(&netif->credit_timeout,
+				mod_timer(&netif->credit_timeout,
 					    next_credit);
 				netif_put(netif);
 				continue;
--
1.7.4


From 52f97ad360f28762c785343ba5c9f8abb83536f3 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 6 Mar 2009 08:29:31 +0000
Subject: [PATCH 062/197] xen: netback: unmap tx ring gref when mapping of rx ring gref failed

[ijc-ported from linux-2.6.18-xen.hg 782:51decc39e5e7]
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/interface.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index d184ad7..f3d9ea1 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -222,6 +222,12 @@ static int map_frontend_pages(
 		BUG();

 	if (op.status) {
+		struct gnttab_unmap_grant_ref unop;
+
+		gnttab_set_unmap_op(&unop,
+				    (unsigned long)netif->tx_comms_area->addr,
+				    GNTMAP_host_map, netif->tx_shmem_handle);
+		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
 		DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
 		return op.status;
 	}
--
1.7.4


From f9b63790f1404eb03ac824147b2294a46e485643 Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Fri, 6 Mar 2009 08:29:32 +0000
Subject: [PATCH 063/197] xen: netback: add ethtool stat to track copied skbs.

Copied skbs should be rare but we have no way of verifying that.

[ijc-ported from linux-2.6.18-xen.hg 792:db9857bb0320]
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/common.h    |    3 ++
 drivers/xen/netback/interface.c |   47 +++++++++++++++++++++++++++++++++++++++
 drivers/xen/netback/netback.c   |    6 ++++-
 3 files changed, 55 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 5665ed1..6ba804d 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -92,6 +92,9 @@ struct xen_netif {
 	/* Enforce draining of the transmit queue. */
 	struct timer_list tx_queue_timeout;

+	/* Statistics */
+	int nr_copied_skbs;
+
 	/* Miscellaneous private stuff. */
 	struct list_head list;  /* scheduling list */
 	atomic_t         refcnt;
diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index f3d9ea1..1a99c87 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -119,8 +119,51 @@ static int netbk_set_tso(struct net_device *dev, u32 data)
 	return ethtool_op_set_tso(dev, data);
 }

+static void netbk_get_drvinfo(struct net_device *dev,
+			      struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, "netbk");
+}
+
+static const struct netif_stat {
+	char name[ETH_GSTRING_LEN];
+	u16 offset;
+} netbk_stats[] = {
+	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
+};
+
+static int netbk_get_stats_count(struct net_device *dev)
+{
+	return ARRAY_SIZE(netbk_stats);
+}
+
+static void netbk_get_ethtool_stats(struct net_device *dev,
+				   struct ethtool_stats *stats, u64 * data)
+{
+	void *netif = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+		data[i] = *(int *)(netif + netbk_stats[i].offset);
+}
+
+static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+{
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+			memcpy(data + i * ETH_GSTRING_LEN,
+			       netbk_stats[i].name, ETH_GSTRING_LEN);
+		break;
+	}
+}
+
 static struct ethtool_ops network_ethtool_ops =
 {
+	.get_drvinfo = netbk_get_drvinfo,
+
 	.get_tx_csum = ethtool_op_get_tx_csum,
 	.set_tx_csum = ethtool_op_set_tx_csum,
 	.get_sg = ethtool_op_get_sg,
@@ -128,6 +171,10 @@ static struct ethtool_ops network_ethtool_ops =
 	.get_tso = ethtool_op_get_tso,
 	.set_tso = netbk_set_tso,
 	.get_link = ethtool_op_get_link,
+
+	.get_stats_count = netbk_get_stats_count,
+	.get_ethtool_stats = netbk_get_ethtool_stats,
+	.get_strings = netbk_get_strings,
 };

 struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index e920703..f59fadb 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -923,7 +923,11 @@ inline static void net_tx_action_dealloc(void)
 			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
 				break;

-			switch (copy_pending_req(inuse - pending_inuse)) {
+			pending_idx = inuse - pending_inuse;
+
+			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+
+			switch (copy_pending_req(pending_idx)) {
 			case 0:
 				list_move_tail(&inuse->list, &list);
 				continue;
--
1.7.4


From c41d8da3d853d4e89ba38693b90c1fe512095704 Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Fri, 6 Mar 2009 08:29:33 +0000
Subject: [PATCH 064/197] xen: netback: make queue length parameter writeable in sysfs

Any changes will only take effect for newly created VIFs.

Also hook up the vif devices to their parent and publish bus info via
ethtool.

[ijc-ported from linux-2.6.18-xen.hg 793:3aa9b8a7876b]
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/interface.c |    3 ++-
 drivers/xen/netback/xenbus.c    |    1 +
 2 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index 1a99c87..7706170 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -52,7 +52,7 @@
  * blocked.
  */
 static unsigned long netbk_queue_length = 32;
-module_param_named(queue_length, netbk_queue_length, ulong, 0);
+module_param_named(queue_length, netbk_queue_length, ulong, 0644);

 static void __netif_up(struct xen_netif *netif)
 {
@@ -123,6 +123,7 @@ static void netbk_get_drvinfo(struct net_device *dev,
 			      struct ethtool_drvinfo *info)
 {
 	strcpy(info->driver, "netbk");
+	strcpy(info->bus_info, dev->dev.parent->bus_id);
 }

 static const struct netif_stat {
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index ed7c006..dc7b367 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -200,6 +200,7 @@ static void backend_create_netif(struct backend_info *be)
 		xenbus_dev_fatal(dev, err, "creating interface");
 		return;
 	}
+	SET_NETDEV_DEV(be->netif->dev, &dev->dev);

 	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
 }
--
1.7.4


From f204d7567ab11ddb1ff3208ab5ed8921b575af5d Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Mon, 16 Mar 2009 22:05:16 +0000
Subject: [PATCH 065/197] xen: netback: parent sysfs device should be set before registering.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/common.h    |    2 +-
 drivers/xen/netback/interface.c |    4 +++-
 drivers/xen/netback/xenbus.c    |    3 +--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 6ba804d..123a169 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -180,7 +180,7 @@ void netif_accel_init(void);

 void netif_disconnect(struct xen_netif *netif);

-struct xen_netif *netif_alloc(domid_t domid, unsigned int handle);
+struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
 int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
 	      unsigned long rx_ring_ref, unsigned int evtchn);

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index 7706170..5e0d26d 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -178,7 +178,7 @@ static struct ethtool_ops network_ethtool_ops =
 	.get_strings = netbk_get_strings,
 };

-struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
+struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
 {
 	int err = 0;
 	struct net_device *dev;
@@ -192,6 +192,8 @@ struct xen_netif *netif_alloc(domid_t domid, unsigned int handle)
 		return ERR_PTR(-ENOMEM);
 	}

+	SET_NETDEV_DEV(dev, parent);
+
 	netif = netdev_priv(dev);
 	memset(netif, 0, sizeof(*netif));
 	netif->domid  = domid;
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index dc7b367..749931e 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -193,14 +193,13 @@ static void backend_create_netif(struct backend_info *be)
 		return;
 	}

-	be->netif = netif_alloc(dev->otherend_id, handle);
+	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
 	if (IS_ERR(be->netif)) {
 		err = PTR_ERR(be->netif);
 		be->netif = NULL;
 		xenbus_dev_fatal(dev, err, "creating interface");
 		return;
 	}
-	SET_NETDEV_DEV(be->netif->dev, &dev->dev);

 	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
 }
--
1.7.4


From bb606178665ea78b505cb54864899478b6020584 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 19 Mar 2009 12:42:36 -0700
Subject: [PATCH 066/197] xen: netback: use NET_SKB_PAD rather than "16"

There's a constant for the default skb headroom.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index f59fadb..400f398 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -203,7 +203,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
 	if (unlikely(!nskb))
 		goto err;

-	skb_reserve(nskb, 16 + NET_IP_ALIGN);
+	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
 	headlen = skb_end_pointer(nskb) - nskb->data;
 	if (headlen > skb_headlen(skb))
 		headlen = skb_headlen(skb);
@@ -1353,7 +1353,7 @@ static void net_tx_action(unsigned long unused)
 			    ret < MAX_SKB_FRAGS) ?
 			PKT_PROT_LEN : txreq.size;

-		skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
+		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
 				GFP_ATOMIC | __GFP_NOWARN);
 		if (unlikely(skb == NULL)) {
 			DPRINTK("Can't allocate a skb in start_xmit.\n");
@@ -1362,7 +1362,7 @@ static void net_tx_action(unsigned long unused)
 		}

 		/* Packets passed to netif_rx() must have some headroom. */
-		skb_reserve(skb, 16 + NET_IP_ALIGN);
+		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);

 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
 			struct xen_netif_extra_info *gso;
--
1.7.4


From fe41ab031dfa0c6f9821c2667ce821e7f4f635ed Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 19 Mar 2009 13:31:26 -0700
Subject: [PATCH 067/197] xen: netback: completely drop flip support

Nobody uses it?

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h  |    1 -
 drivers/xen/netback/netback.c |  245 ++++-------------------------------------
 drivers/xen/netback/xenbus.c  |    3 +-
 3 files changed, 22 insertions(+), 227 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 123a169..06f04c1 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -78,7 +78,6 @@ struct xen_netif {

 	/* Internal feature information. */
 	u8 can_queue:1;	/* can queue packets for receiver? */
-	u8 copying_receiver:1;	/* copy packets to receiver?       */

 	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
 	RING_IDX rx_req_cons_peek;
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 400f398..36bea2b 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -51,7 +51,6 @@
 struct netbk_rx_meta {
 	skb_frag_t frag;
 	int id;
-	u8 copy:1;
 };

 struct netbk_tx_pending_inuse {
@@ -160,26 +159,6 @@ static inline unsigned long alloc_mfn(void)
 	return mfn_list[--alloc_index];
 }

-static int check_mfn(int nr)
-{
-	struct xen_memory_reservation reservation = {
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
-	int rc;
-
-	if (likely(alloc_index >= nr))
-		return 0;
-
-	set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
-	reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
-	rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
-	if (likely(rc > 0))
-		alloc_index += rc;
-
-	return alloc_index >= nr ? 0 : -ENOMEM;
-}
-
 static inline void maybe_schedule_tx_action(void)
 {
 	smp_mb();
@@ -188,82 +167,6 @@ static inline void maybe_schedule_tx_action(void)
 		tasklet_schedule(&net_tx_tasklet);
 }

-static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
-{
-	struct skb_shared_info *ninfo;
-	struct sk_buff *nskb;
-	unsigned long offset;
-	int ret;
-	int len;
-	int headlen;
-
-	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
-
-	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
-	if (unlikely(!nskb))
-		goto err;
-
-	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
-	headlen = skb_end_pointer(nskb) - nskb->data;
-	if (headlen > skb_headlen(skb))
-		headlen = skb_headlen(skb);
-	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
-	BUG_ON(ret);
-
-	ninfo = skb_shinfo(nskb);
-	ninfo->gso_size = skb_shinfo(skb)->gso_size;
-	ninfo->gso_type = skb_shinfo(skb)->gso_type;
-
-	offset = headlen;
-	len = skb->len - headlen;
-
-	nskb->len = skb->len;
-	nskb->data_len = len;
-	nskb->truesize += len;
-
-	while (len) {
-		struct page *page;
-		int copy;
-		int zero;
-
-		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
-			dump_stack();
-			goto err_free;
-		}
-
-		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
-		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
-
-		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
-		if (unlikely(!page))
-			goto err_free;
-
-		ret = skb_copy_bits(skb, offset, page_address(page), copy);
-		BUG_ON(ret);
-
-		ninfo->frags[ninfo->nr_frags].page = page;
-		ninfo->frags[ninfo->nr_frags].page_offset = 0;
-		ninfo->frags[ninfo->nr_frags].size = copy;
-		ninfo->nr_frags++;
-
-		offset += copy;
-		len -= copy;
-	}
-
-	offset = nskb->data - skb->data;
-
-	nskb->transport_header = skb->transport_header + offset;
-	nskb->network_header = skb->network_header + offset;
-	nskb->mac_header = skb->mac_header + offset;
-
-	return nskb;
-
- err_free:
-	kfree_skb(nskb);
- err:
-	return NULL;
-}
-
 static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
 {
 	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
@@ -297,24 +200,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
 		goto drop;

-	/*
-	 * Copy the packet here if it's destined for a flipping interface
-	 * but isn't flippable (e.g. extra references to data).
-	 * XXX For now we also copy skbuffs whose head crosses a page
-	 * boundary, because netbk_gop_skb can't handle them.
-	 */
-	if (!netif->copying_receiver ||
-	    ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
-		struct sk_buff *nskb = netbk_copy_skb(skb);
-		if ( unlikely(nskb == NULL) )
-			goto drop;
-		/* Copy only the header fields we use in this driver. */
-		nskb->dev = skb->dev;
-		nskb->ip_summed = skb->ip_summed;
-		dev_kfree_skb(skb);
-		skb = nskb;
-	}
-
 	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
 				   !!skb_shinfo(skb)->gso_size;
 	netif_get(netif);
@@ -388,66 +273,32 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
 			  struct page *page, unsigned long size,
 			  unsigned long offset)
 {
-	struct mmu_update *mmu;
-	struct gnttab_transfer *gop;
 	struct gnttab_copy *copy_gop;
-	struct multicall_entry *mcl;
 	struct xen_netif_rx_request *req;
-	unsigned long old_mfn, new_mfn;
+	unsigned long old_mfn;
 	int idx = netif_page_index(page);

 	old_mfn = virt_to_mfn(page_address(page));

 	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
-	if (netif->copying_receiver) {
-		/* The fragment needs to be copied rather than
-		   flipped. */
-		meta->copy = 1;
-		copy_gop = npo->copy + npo->copy_prod++;
-		copy_gop->flags = GNTCOPY_dest_gref;
-		if (idx > -1) {
-			struct pending_tx_info *src_pend = &pending_tx_info[idx];
-			copy_gop->source.domid = src_pend->netif->domid;
-			copy_gop->source.u.ref = src_pend->req.gref;
-			copy_gop->flags |= GNTCOPY_source_gref;
-		} else {
-			copy_gop->source.domid = DOMID_SELF;
-			copy_gop->source.u.gmfn = old_mfn;
-		}
-		copy_gop->source.offset = offset;
-		copy_gop->dest.domid = netif->domid;
-		copy_gop->dest.offset = 0;
-		copy_gop->dest.u.ref = req->gref;
-		copy_gop->len = size;
-	} else {
-		meta->copy = 0;
-		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-			new_mfn = alloc_mfn();
-
-			/*
-			 * Set the new P2M table entry before
-			 * reassigning the old data page. Heed the
-			 * comment in pgtable-2level.h:pte_page(). :-)
-			 */
-			set_phys_to_machine(page_to_pfn(page), new_mfn);
-
-			mcl = npo->mcl + npo->mcl_prod++;
-			MULTI_update_va_mapping(mcl,
-					     (unsigned long)page_address(page),
-					     mfn_pte(new_mfn, PAGE_KERNEL),
-					     0);
-
-			mmu = npo->mmu + npo->mmu_prod++;
-			mmu->ptr = ((phys_addr_t)new_mfn << PAGE_SHIFT) |
-				    MMU_MACHPHYS_UPDATE;
-			mmu->val = page_to_pfn(page);
-		}

-		gop = npo->trans + npo->trans_prod++;
-		gop->mfn = old_mfn;
-		gop->domid = netif->domid;
-		gop->ref = req->gref;
+	copy_gop = npo->copy + npo->copy_prod++;
+	copy_gop->flags = GNTCOPY_dest_gref;
+	if (idx > -1) {
+		struct pending_tx_info *src_pend = &pending_tx_info[idx];
+		copy_gop->source.domid = src_pend->netif->domid;
+		copy_gop->source.u.ref = src_pend->req.gref;
+		copy_gop->flags |= GNTCOPY_source_gref;
+	} else {
+		copy_gop->source.domid = DOMID_SELF;
+		copy_gop->source.u.gmfn = old_mfn;
 	}
+	copy_gop->source.offset = offset;
+	copy_gop->dest.domid = netif->domid;
+	copy_gop->dest.offset = 0;
+	copy_gop->dest.u.ref = req->gref;
+	copy_gop->len = size;
+
 	return req->id;
 }

@@ -502,41 +353,17 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
 static int netbk_check_gop(int nr_frags, domid_t domid,
 			   struct netrx_pending_operations *npo)
 {
-	struct multicall_entry *mcl;
-	struct gnttab_transfer *gop;
 	struct gnttab_copy     *copy_op;
 	int status = NETIF_RSP_OKAY;
 	int i;

 	for (i = 0; i <= nr_frags; i++) {
-		if (npo->meta[npo->meta_cons + i].copy) {
 			copy_op = npo->copy + npo->copy_cons++;
 			if (copy_op->status != GNTST_okay) {
 				DPRINTK("Bad status %d from copy to DOM%d.\n",
 					copy_op->status, domid);
 				status = NETIF_RSP_ERROR;
 			}
-		} else {
-			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-				mcl = npo->mcl + npo->mcl_cons++;
-				/* The update_va_mapping() must not fail. */
-				BUG_ON(mcl->result != 0);
-			}
-
-			gop = npo->trans + npo->trans_cons++;
-			/* Check the reassignment error code. */
-			if (gop->status != 0) {
-				DPRINTK("Bad status %d from grant transfer to DOM%u\n",
-					gop->status, domid);
-				/*
-				 * Page no longer belongs to us unless
-				 * GNTST_bad_page, but that should be
-				 * a fatal error anyway.
-				 */
-				BUG_ON(gop->status == GNTST_bad_page);
-				status = NETIF_RSP_ERROR;
-			}
-		}
 	}

 	return status;
@@ -551,11 +378,8 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
 	for (i = 0; i < nr_frags; i++) {
 		int id = meta[i].id;
 		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
-
-		if (meta[i].copy)
-			offset = 0;
-		else
-			offset = meta[i].frag.page_offset;
+
+		offset = 0;
 		make_rx_response(netif, id, status, offset,
 				 meta[i].frag.size, flags);
 	}
@@ -603,18 +427,6 @@ static void net_rx_action(unsigned long unused)
 		nr_frags = skb_shinfo(skb)->nr_frags;
 		*(int *)skb->cb = nr_frags;

-		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
-		    !((struct xen_netif *)netdev_priv(skb->dev))->copying_receiver &&
-		    check_mfn(nr_frags + 1)) {
-			/* Memory squeeze? Back off for an arbitrary while. */
-			if ( net_ratelimit() )
-				WPRINTK("Memory squeeze in netback "
-					"driver.\n");
-			mod_timer(&net_timer, jiffies + HZ);
-			skb_queue_head(&rx_queue, skb);
-			break;
-		}
-
 		netbk_gop_skb(skb, &npo);

 		count += nr_frags + 1;
@@ -677,20 +489,6 @@ static void net_rx_action(unsigned long unused)
 		nr_frags = *(int *)skb->cb;

 		netif = netdev_priv(skb->dev);
-		/* We can't rely on skb_release_data to release the
-		   pages used by fragments for us, since it tries to
-		   touch the pages in the fraglist.  If we're in
-		   flipping mode, that doesn't work.  In copying mode,
-		   we still have access to all of the pages, and so
-		   it's safe to let release_data deal with it. */
-		/* (Freeing the fragments is safe since we copy
-		   non-linear skbs destined for flipping interfaces) */
-		if (!netif->copying_receiver) {
-			atomic_set(&(skb_shinfo(skb)->dataref), 1);
-			skb_shinfo(skb)->frag_list = NULL;
-			skb_shinfo(skb)->nr_frags = 0;
-			netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
-		}

 		netif->stats.tx_bytes += skb->len;
 		netif->stats.tx_packets++;
@@ -706,10 +504,7 @@ static void net_rx_action(unsigned long unused)
 			/* remote but checksummed. */
 			flags |= NETRXF_data_validated;

-		if (meta[npo.meta_cons].copy)
-			offset = 0;
-		else
-			offset = offset_in_page(skb->data);
+		offset = 0;
 		resp = make_rx_response(netif, id, status, offset,
 					skb_headlen(skb), flags);

diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index 749931e..a492288 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -378,7 +378,8 @@ static int connect_rings(struct backend_info *be)
 				 dev->otherend);
 		return err;
 	}
-	be->netif->copying_receiver = !!rx_copy;
+	if (!rx_copy)
+		return -EOPNOTSUPP;

 	if (be->netif->dev->tx_queue_len != 0) {
 		if (xenbus_scanf(XBT_NIL, dev->otherend,
--
1.7.4


From 17d465234118873ab4f5a7992feb4ce7b5537cf7 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 19 Mar 2009 15:19:39 -0700
Subject: [PATCH 068/197] xen: netback: demacro MASK_PEND_IDX

Replace it with a more meaningful inline: pending_index().

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |   21 +++++++++++++--------
 1 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 36bea2b..4095622 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -121,7 +121,12 @@ static struct pending_tx_info {
 } pending_tx_info[MAX_PENDING_REQS];
 static u16 pending_ring[MAX_PENDING_REQS];
 typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+
+static inline PEND_RING_IDX pending_index(unsigned i)
+{
+	return i & (MAX_PENDING_REQS-1);
+}
+
 static PEND_RING_IDX pending_prod, pending_cons;
 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)

@@ -695,7 +700,7 @@ inline static void net_tx_action_dealloc(void)
 		while (dc != dp) {
 			unsigned long pfn;

-			pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+			pending_idx = dealloc_ring[pending_index(dc++)];
 			list_move_tail(&pending_inuse[pending_idx].list, &list);

 			pfn = idx_to_pfn(pending_idx);
@@ -754,7 +759,7 @@ inline static void net_tx_action_dealloc(void)
 		/* Ready for next use. */
 		gnttab_reset_grant_page(mmap_pages[pending_idx]);

-		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+		pending_ring[pending_index(pending_prod++)] = pending_idx;

 		netif_put(netif);

@@ -831,7 +836,7 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);

 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
-		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
+		pending_idx = pending_ring[pending_index(pending_cons++)];

 		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
 				  GNTMAP_host_map | GNTMAP_readonly,
@@ -862,7 +867,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
 	if (unlikely(err)) {
 		txp = &pending_tx_info[pending_idx].req;
 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+		pending_ring[pending_index(pending_prod++)] = pending_idx;
 		netif_put(netif);
 	} else {
 		set_phys_to_machine(
@@ -895,7 +900,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
 		/* Error on this fragment: respond to client with an error. */
 		txp = &pending_tx_info[pending_idx].req;
 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+		pending_ring[pending_index(pending_prod++)] = pending_idx;
 		netif_put(netif);

 		/* Not the first error? Preceding frags already invalidated. */
@@ -1142,7 +1147,7 @@ static void net_tx_action(unsigned long unused)
 			continue;
 		}

-		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+		pending_idx = pending_ring[pending_index(pending_cons)];

 		data_len = (txreq.size > PKT_PROT_LEN &&
 			    ret < MAX_SKB_FRAGS) ?
@@ -1298,7 +1303,7 @@ static void netif_idx_release(u16 pending_idx)
 	unsigned long flags;

 	spin_lock_irqsave(&_lock, flags);
-	dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
+	dealloc_ring[pending_index(dealloc_prod)] = pending_idx;
 	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
 	smp_wmb();
 	dealloc_prod++;
--
1.7.4


From d47af34f87b2d365c75aa3579ad512619ef3d579 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 19 Mar 2009 15:29:30 -0700
Subject: [PATCH 069/197] xen: netback: convert PEND_RING_IDX into a proper typedef name

Rename PEND_RING_IDX to pending_ring_idx_t.  Its not used that much,
the extra typing won't kill anyone.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |   12 ++++++------
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 4095622..8292e96 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -120,19 +120,19 @@ static struct pending_tx_info {
 	struct xen_netif *netif;
 } pending_tx_info[MAX_PENDING_REQS];
 static u16 pending_ring[MAX_PENDING_REQS];
-typedef unsigned int PEND_RING_IDX;
+typedef unsigned int pending_ring_idx_t;

-static inline PEND_RING_IDX pending_index(unsigned i)
+static inline pending_ring_idx_t pending_index(unsigned i)
 {
 	return i & (MAX_PENDING_REQS-1);
 }

-static PEND_RING_IDX pending_prod, pending_cons;
+static pending_ring_idx_t pending_prod, pending_cons;
 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)

 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
 static u16 dealloc_ring[MAX_PENDING_REQS];
-static PEND_RING_IDX dealloc_prod, dealloc_cons;
+static pending_ring_idx_t dealloc_prod, dealloc_cons;

 /* Doubly-linked list of in-use pending entries. */
 static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
@@ -669,7 +669,7 @@ static void tx_credit_callback(unsigned long data)
 	netif_schedule_work(netif);
 }

-static inline int copy_pending_req(PEND_RING_IDX pending_idx)
+static inline int copy_pending_req(pending_ring_idx_t pending_idx)
 {
 	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
 				      &mmap_pages[pending_idx]);
@@ -680,7 +680,7 @@ inline static void net_tx_action_dealloc(void)
 	struct netbk_tx_pending_inuse *inuse, *n;
 	struct gnttab_unmap_grant_ref *gop;
 	u16 pending_idx;
-	PEND_RING_IDX dc, dp;
+	pending_ring_idx_t dc, dp;
 	struct xen_netif *netif;
 	int ret;
 	LIST_HEAD(list);
--
1.7.4


From 56727a43f329d50c2a00fed0316ffd87d6c23ebd Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 19 Mar 2009 15:31:32 -0700
Subject: [PATCH 070/197] xen: netback: rename NR_PENDING_REQS to nr_pending_reqs()

Use function syntax to show its actually computing a value, rather than
a constant.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |   10 +++++++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 8292e96..5410a68 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -128,7 +128,11 @@ static inline pending_ring_idx_t pending_index(unsigned i)
 }

 static pending_ring_idx_t pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+static inline pending_ring_idx_t nr_pending_reqs(void)
+{
+	return MAX_PENDING_REQS - pending_prod + pending_cons;
+}

 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
 static u16 dealloc_ring[MAX_PENDING_REQS];
@@ -167,7 +171,7 @@ static inline unsigned long alloc_mfn(void)
 static inline void maybe_schedule_tx_action(void)
 {
 	smp_mb();
-	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+	if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
 	    !list_empty(&net_schedule_list))
 		tasklet_schedule(&net_tx_tasklet);
 }
@@ -1060,7 +1064,7 @@ static void net_tx_action(unsigned long unused)
 		net_tx_action_dealloc();

 	mop = tx_map_ops;
-	while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
 		!list_empty(&net_schedule_list)) {
 		/* Get a netif from the list with work to do. */
 		ent = net_schedule_list.next;
--
1.7.4


From 55b360614f1bd44d0b1395b4aabf41d8f1f13f17 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 19 Mar 2009 15:45:45 -0700
Subject: [PATCH 071/197] xen: netback: pre-initialize list and spinlocks; use empty list to indicate not on list

Statically pre-initialize net_schedule_list head and lock.

Use an empty list to mark when a xen_netif is not on the schedule list,
rather than NULL (which may upset list debugging).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/interface.c |    1 +
 drivers/xen/netback/netback.c   |   12 ++++--------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index 5e0d26d..dc4fb53 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -201,6 +201,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
 	atomic_set(&netif->refcnt, 1);
 	init_waitqueue_head(&netif->waiting_to_free);
 	netif->dev = dev;
+	INIT_LIST_HEAD(&netif->list);

 	netback_carrier_off(netif);

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 5410a68..cbd4b03 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -148,8 +148,8 @@ static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
 static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
 static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];

-static struct list_head net_schedule_list;
-static spinlock_t net_schedule_list_lock;
+static LIST_HEAD(net_schedule_list);
+static DEFINE_SPINLOCK(net_schedule_list_lock);

 #define MAX_MFN_ALLOC 64
 static unsigned long mfn_list[MAX_MFN_ALLOC];
@@ -588,15 +588,14 @@ struct net_device_stats *netif_be_get_stats(struct net_device *dev)

 static int __on_net_schedule_list(struct xen_netif *netif)
 {
-	return netif->list.next != NULL;
+	return !list_empty(&netif->list);
 }

 static void remove_from_net_schedule_list(struct xen_netif *netif)
 {
 	spin_lock_irq(&net_schedule_list_lock);
 	if (likely(__on_net_schedule_list(netif))) {
-		list_del(&netif->list);
-		netif->list.next = NULL;
+		list_del_init(&netif->list);
 		netif_put(netif);
 	}
 	spin_unlock_irq(&net_schedule_list_lock);
@@ -1466,9 +1465,6 @@ static int __init netback_init(void)
 	for (i = 0; i < MAX_PENDING_REQS; i++)
 		pending_ring[i] = i;

-	spin_lock_init(&net_schedule_list_lock);
-	INIT_LIST_HEAD(&net_schedule_list);
-
 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
 	if (MODPARM_copy_skb) {
 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
--
1.7.4


From e12cf57de7a6c20e4c8900ce7bf4e6924a12f49e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 19 Mar 2009 15:48:10 -0700
Subject: [PATCH 072/197] xen: netback: remove CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER

Keir says:
> > Does CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER need to be a config
> > option?  Could/should we always/never set it?
> It doesn't work well with local delivery into dom0, nor even with IP
> fragment reassembly. I don't think we would ever turn it on these days.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |   21 ---------------------
 1 files changed, 0 insertions(+), 21 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index cbd4b03..f00e405 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -615,23 +615,11 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
 	spin_unlock_irq(&net_schedule_list_lock);
 }

-/*
- * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
- * If this driver is pipelining transmit requests then we can be very
- * aggressive in avoiding new-packet notifications -- frontend only needs to
- * send a notification if there are no outstanding unreceived responses.
- * If we may be buffer transmit buffers for any reason then we must be rather
- * more conservative and treat this as the final check for pending work.
- */
 void netif_schedule_work(struct xen_netif *netif)
 {
 	int more_to_do;

-#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
-	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
-#else
 	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
-#endif

 	if (more_to_do) {
 		add_to_net_schedule_list_tail(netif);
@@ -1355,15 +1343,6 @@ static void make_tx_response(struct xen_netif *netif,
 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
 	if (notify)
 		notify_remote_via_irq(netif->irq);
-
-#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
-	if (i == netif->tx.req_cons) {
-		int more_to_do;
-		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
-		if (more_to_do)
-			add_to_net_schedule_list_tail(netif);
-	}
-#endif
 }

 static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
--
1.7.4


From adf542f9c714e3b7c76fcf9e44e0a89cae21a341 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 19 Mar 2009 22:28:52 -0700
Subject: [PATCH 073/197] xen: netback: make netif_get/put inlines

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h |   16 ++++++++++------
 1 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 06f04c1..9056be0 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -183,12 +183,16 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
 int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
 	      unsigned long rx_ring_ref, unsigned int evtchn);

-#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define netif_put(_b)						\
-	do {							\
-		if ( atomic_dec_and_test(&(_b)->refcnt) )	\
-			wake_up(&(_b)->waiting_to_free);	\
-	} while (0)
+static inline void netif_get(struct xen_netif *netif)
+{
+	atomic_inc(&netif->refcnt);
+}
+
+static inline void  netif_put(struct xen_netif *netif)
+{
+	if (atomic_dec_and_test(&netif->refcnt))
+		wake_up(&netif->waiting_to_free);
+}

 void netif_xenbus_init(void);

--
1.7.4


From f06459a199f953a68f001f06e54dde54e1e16c87 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 19 Mar 2009 22:30:24 -0700
Subject: [PATCH 074/197] xen: netback: move code around

net_tx_action() into several functions; move variables into
their innermost scopes; rename "i" to "idx".

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |  158 ++++++++++++++++++++++++-----------------
 1 files changed, 94 insertions(+), 64 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index f00e405..4d63ff3 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -773,7 +773,8 @@ static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *t
 	netif_put(netif);
 }

-static int netbk_count_requests(struct xen_netif *netif, struct xen_netif_tx_request *first,
+static int netbk_count_requests(struct xen_netif *netif,
+				struct xen_netif_tx_request *first,
 				struct xen_netif_tx_request *txp, int work_to_do)
 {
 	RING_IDX cons = netif->tx.req_cons;
@@ -1032,30 +1033,58 @@ out:
 	return err;
 }

-/* Called after netfront has transmitted */
-static void net_tx_action(unsigned long unused)
+static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
 {
-	struct list_head *ent;
-	struct sk_buff *skb;
-	struct xen_netif *netif;
-	struct xen_netif_tx_request txreq;
-	struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
-	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
-	u16 pending_idx;
-	RING_IDX i;
-	struct gnttab_map_grant_ref *mop;
-	unsigned int data_len;
-	int ret, work_to_do;
+	unsigned long now = jiffies;
+	unsigned long next_credit =
+		netif->credit_timeout.expires +
+		msecs_to_jiffies(netif->credit_usec / 1000);
+
+	/* Timer could already be pending in rare cases. */
+	if (timer_pending(&netif->credit_timeout))
+		return true;
+
+	/* Passed the point where we can replenish credit? */
+	if (time_after_eq(now, next_credit)) {
+		netif->credit_timeout.expires = now;
+		tx_add_credit(netif);
+	}

-	if (dealloc_cons != dealloc_prod)
-		net_tx_action_dealloc();
+	/* Still too big to send right now? Set a callback. */
+	if (size > netif->remaining_credit) {
+		netif->credit_timeout.data     =
+			(unsigned long)netif;
+		netif->credit_timeout.function =
+			tx_credit_callback;
+		mod_timer(&netif->credit_timeout,
+			  next_credit);
+
+		return true;
+	}
+
+	return false;
+}
+
+static unsigned net_tx_build_mops(void)
+{
+	struct gnttab_map_grant_ref *mop;
+	struct sk_buff *skb;
+	int ret;

 	mop = tx_map_ops;
 	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
 		!list_empty(&net_schedule_list)) {
+		struct xen_netif *netif;
+		struct xen_netif_tx_request txreq;
+		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+		u16 pending_idx;
+		RING_IDX idx;
+		int work_to_do;
+		unsigned int data_len;
+
 		/* Get a netif from the list with work to do. */
-		ent = net_schedule_list.next;
-		netif = list_entry(ent, struct xen_netif, list);
+		netif = list_first_entry(&net_schedule_list, struct xen_netif, list);
 		netif_get(netif);
 		remove_from_net_schedule_list(netif);

@@ -1065,67 +1094,43 @@ static void net_tx_action(unsigned long unused)
 			continue;
 		}

-		i = netif->tx.req_cons;
+		idx = netif->tx.req_cons;
 		rmb(); /* Ensure that we see the request before we copy it. */
-		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));

 		/* Credit-based scheduling. */
-		if (txreq.size > netif->remaining_credit) {
-			unsigned long now = jiffies;
-			unsigned long next_credit =
-				netif->credit_timeout.expires +
-				msecs_to_jiffies(netif->credit_usec / 1000);
-
-			/* Timer could already be pending in rare cases. */
-			if (timer_pending(&netif->credit_timeout)) {
-				netif_put(netif);
-				continue;
-			}
-
-			/* Passed the point where we can replenish credit? */
-			if (time_after_eq(now, next_credit)) {
-				netif->credit_timeout.expires = now;
-				tx_add_credit(netif);
-			}
-
-			/* Still too big to send right now? Set a callback. */
-			if (txreq.size > netif->remaining_credit) {
-				netif->credit_timeout.data     =
-					(unsigned long)netif;
-				netif->credit_timeout.function =
-					tx_credit_callback;
-				mod_timer(&netif->credit_timeout,
-					    next_credit);
-				netif_put(netif);
-				continue;
-			}
+		if (txreq.size > netif->remaining_credit &&
+		    tx_credit_exceeded(netif, txreq.size)) {
+			netif_put(netif);
+			continue;
 		}
+
 		netif->remaining_credit -= txreq.size;

 		work_to_do--;
-		netif->tx.req_cons = ++i;
+		netif->tx.req_cons = ++idx;

 		memset(extras, 0, sizeof(extras));
 		if (txreq.flags & NETTXF_extra_info) {
 			work_to_do = netbk_get_extras(netif, extras,
 						      work_to_do);
-			i = netif->tx.req_cons;
+			idx = netif->tx.req_cons;
 			if (unlikely(work_to_do < 0)) {
-				netbk_tx_err(netif, &txreq, i);
+				netbk_tx_err(netif, &txreq, idx);
 				continue;
 			}
 		}

 		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
 		if (unlikely(ret < 0)) {
-			netbk_tx_err(netif, &txreq, i - ret);
+			netbk_tx_err(netif, &txreq, idx - ret);
 			continue;
 		}
-		i += ret;
+		idx += ret;

 		if (unlikely(txreq.size < ETH_HLEN)) {
 			DPRINTK("Bad packet size: %d\n", txreq.size);
-			netbk_tx_err(netif, &txreq, i);
+			netbk_tx_err(netif, &txreq, idx);
 			continue;
 		}

@@ -1134,7 +1139,7 @@ static void net_tx_action(unsigned long unused)
 			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
 				txreq.offset, txreq.size,
 				(txreq.offset &~PAGE_MASK) + txreq.size);
-			netbk_tx_err(netif, &txreq, i);
+			netbk_tx_err(netif, &txreq, idx);
 			continue;
 		}

@@ -1148,7 +1153,7 @@ static void net_tx_action(unsigned long unused)
 				GFP_ATOMIC | __GFP_NOWARN);
 		if (unlikely(skb == NULL)) {
 			DPRINTK("Can't allocate a skb in start_xmit.\n");
-			netbk_tx_err(netif, &txreq, i);
+			netbk_tx_err(netif, &txreq, idx);
 			break;
 		}

@@ -1161,7 +1166,7 @@ static void net_tx_action(unsigned long unused)

 			if (netbk_set_skb_gso(skb, gso)) {
 				kfree_skb(skb);
-				netbk_tx_err(netif, &txreq, i);
+				netbk_tx_err(netif, &txreq, idx);
 				continue;
 			}
 		}
@@ -1199,23 +1204,27 @@ static void net_tx_action(unsigned long unused)

 		mop = netbk_get_requests(netif, skb, txfrags, mop);

-		netif->tx.req_cons = i;
+		netif->tx.req_cons = idx;
 		netif_schedule_work(netif);

 		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
 			break;
 	}

-	if (mop == tx_map_ops)
-		return;
+	return mop - tx_map_ops;
+}

-	ret = HYPERVISOR_grant_table_op(
-		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
-	BUG_ON(ret);
+static void net_tx_submit(void)
+{
+	struct gnttab_map_grant_ref *mop;
+	struct sk_buff *skb;

 	mop = tx_map_ops;
 	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
 		struct xen_netif_tx_request *txp;
+		struct xen_netif *netif;
+		u16 pending_idx;
+		unsigned data_len;

 		pending_idx = *((u16 *)skb->data);
 		netif       = pending_tx_info[pending_idx].netif;
@@ -1288,6 +1297,27 @@ static void net_tx_action(unsigned long unused)
 	}
 }

+/* Called after netfront has transmitted */
+static void net_tx_action(unsigned long unused)
+{
+	unsigned nr_mops;
+	int ret;
+
+	if (dealloc_cons != dealloc_prod)
+		net_tx_action_dealloc();
+
+	nr_mops = net_tx_build_mops();
+
+	if (nr_mops == 0)
+		return;
+
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+					tx_map_ops, nr_mops);
+	BUG_ON(ret);
+
+	net_tx_submit();
+}
+
 static void netif_idx_release(u16 pending_idx)
 {
 	static DEFINE_SPINLOCK(_lock);
--
1.7.4


From cec84ff81d9f6ca882908572b984215529b4117b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 20 Mar 2009 23:18:12 -0700
Subject: [PATCH 075/197] xen: netback: document PKT_PROT_LEN

Document the rationale for the existence and value of PKT_PROT_LEN.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    9 +++++++++
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 4d63ff3..80b424f 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -113,6 +113,15 @@ static inline int netif_page_index(struct page *pg)
 	return idx;
 }

+/*
+ * This is the amount of packet we copy rather than map, so that the
+ * guest can't fiddle with the contents of the headers while we do
+ * packet processing on them (netfilter, routing, etc).  This could
+ * probably do with being larger, since 1) 64-bytes isn't necessarily
+ * long enough to cover a full christmas-tree ip+tcp header, let alone
+ * packet contents, and 2) the data is probably in cache anyway
+ * (though perhaps some other cpu's cache).
+ */
 #define PKT_PROT_LEN 64

 static struct pending_tx_info {
--
1.7.4


From a9402ee935757e8facebc6e886f9912c2c523da7 Mon Sep 17 00:00:00 2001
From: Christophe Saout <chtephan@leto.intern.saout.de>
Date: Sun, 12 Apr 2009 13:40:27 +0200
Subject: [PATCH 076/197] xen: netback: use dev_name() instead of removed ->bus_id.

Signed-off-by: Christophe Saout <chtephan@leto.intern.saout.de>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/interface.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index dc4fb53..3bb5c20 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -123,7 +123,7 @@ static void netbk_get_drvinfo(struct net_device *dev,
 			      struct ethtool_drvinfo *info)
 {
 	strcpy(info->driver, "netbk");
-	strcpy(info->bus_info, dev->dev.parent->bus_id);
+	strcpy(info->bus_info, dev_name(dev->dev.parent));
 }

 static const struct netif_stat {
--
1.7.4


From 35de1701fca19d693e9722bffbe7609caf1d5ac6 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 29 Jun 2009 14:04:23 -0700
Subject: [PATCH 077/197] xen: netback: convert to net_device_ops

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/interface.c |   17 +++++++++++------
 1 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index 3bb5c20..21c1f95 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -178,6 +178,15 @@ static struct ethtool_ops network_ethtool_ops =
 	.get_strings = netbk_get_strings,
 };

+static struct net_device_ops netback_ops =
+{
+	.ndo_start_xmit	= netif_be_start_xmit,
+	.ndo_get_stats	= netif_be_get_stats,
+	.ndo_open	= net_open,
+	.ndo_stop	= net_close,
+	.ndo_change_mtu	= netbk_change_mtu,
+};
+
 struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
 {
 	int err = 0;
@@ -213,12 +222,8 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int

 	init_timer(&netif->tx_queue_timeout);

-	dev->hard_start_xmit = netif_be_start_xmit;
-	dev->get_stats       = netif_be_get_stats;
-	dev->open            = net_open;
-	dev->stop            = net_close;
-	dev->change_mtu	     = netbk_change_mtu;
-	dev->features        = NETIF_F_IP_CSUM;
+	dev->netdev_ops	= &netback_ops;
+	dev->features	= NETIF_F_IP_CSUM;

 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);

--
1.7.4


From c6f3885ef05e96489025e1c1c7299aac7cf43d87 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 4 Sep 2009 14:55:43 -0700
Subject: [PATCH 078/197] xen: netback: reinstate missing code

Change c3219dc868fe3e84070d6da2d0759a834b6f7251, "Completely drop flip
support" was a bit too aggressive in removing code, and removed a chunk
which was used for not only flip but if a buffer crossed a page boundary.
Reinstate that code.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |   91 +++++++++++++++++++++++++++++++++++++++++
 1 files changed, 91 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 80b424f..7c0f05b 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -185,6 +185,82 @@ static inline void maybe_schedule_tx_action(void)
 		tasklet_schedule(&net_tx_tasklet);
 }

+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+{
+	struct skb_shared_info *ninfo;
+	struct sk_buff *nskb;
+	unsigned long offset;
+	int ret;
+	int len;
+	int headlen;
+
+	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
+
+	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!nskb))
+		goto err;
+
+	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
+	headlen = skb_end_pointer(nskb) - nskb->data;
+	if (headlen > skb_headlen(skb))
+		headlen = skb_headlen(skb);
+	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+	BUG_ON(ret);
+
+	ninfo = skb_shinfo(nskb);
+	ninfo->gso_size = skb_shinfo(skb)->gso_size;
+	ninfo->gso_type = skb_shinfo(skb)->gso_type;
+
+	offset = headlen;
+	len = skb->len - headlen;
+
+	nskb->len = skb->len;
+	nskb->data_len = len;
+	nskb->truesize += len;
+
+	while (len) {
+		struct page *page;
+		int copy;
+		int zero;
+
+		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+			dump_stack();
+			goto err_free;
+		}
+
+		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+
+		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
+		if (unlikely(!page))
+			goto err_free;
+
+		ret = skb_copy_bits(skb, offset, page_address(page), copy);
+		BUG_ON(ret);
+
+		ninfo->frags[ninfo->nr_frags].page = page;
+		ninfo->frags[ninfo->nr_frags].page_offset = 0;
+		ninfo->frags[ninfo->nr_frags].size = copy;
+		ninfo->nr_frags++;
+
+		offset += copy;
+		len -= copy;
+	}
+
+	offset = nskb->data - skb->data;
+
+	nskb->transport_header = skb->transport_header + offset;
+	nskb->network_header = skb->network_header + offset;
+	nskb->mac_header = skb->mac_header + offset;
+
+	return nskb;
+
+ err_free:
+	kfree_skb(nskb);
+ err:
+	return NULL;
+}
+
 static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
 {
 	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
@@ -218,6 +294,21 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
 		goto drop;

+	/*
+	 * XXX For now we also copy skbuffs whose head crosses a page
+	 * boundary, because netbk_gop_skb can't handle them.
+	 */
+	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
+		struct sk_buff *nskb = netbk_copy_skb(skb);
+		if ( unlikely(nskb == NULL) )
+			goto drop;
+		/* Copy only the header fields we use in this driver. */
+		nskb->dev = skb->dev;
+		nskb->ip_summed = skb->ip_summed;
+		dev_kfree_skb(skb);
+		skb = nskb;
+	}
+
 	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
 				   !!skb_shinfo(skb)->gso_size;
 	netif_get(netif);
--
1.7.4


From 2e290d790877df4368691180f76206ad27a42505 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 9 Sep 2009 15:19:15 -0700
Subject: [PATCH 079/197] xen: netback: remove debug noise

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 7c0f05b..d7d738e 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1537,8 +1537,6 @@ static int __init netback_init(void)
 	int i;
 	struct page *page;

-	printk(KERN_CRIT "*** netif_init\n");
-
 	if (!xen_domain())
 		return -ENODEV;

--
1.7.4


From 3ba3bb7d563704c3050de6116aa0a761a5791428 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 8 Oct 2009 13:23:09 -0400
Subject: [PATCH 080/197] Fix compile warnings: ignoring return value of 'xenbus_register_backend' ..

We neglect to check the return value of xenbus_register_backend
and take actions when that fails. This patch fixes that and adds
code to deal with those type of failures.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h  |    2 +-
 drivers/xen/netback/netback.c |   12 +++++++++++-
 drivers/xen/netback/xenbus.c  |    4 ++--
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 9056be0..0675946 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -194,7 +194,7 @@ static inline void  netif_put(struct xen_netif *netif)
 		wake_up(&netif->waiting_to_free);
 }

-void netif_xenbus_init(void);
+int netif_xenbus_init(void);

 #define netif_schedulable(netif)				\
 	(netif_running((netif)->dev) && netback_carrier_ok(netif))
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index d7d738e..860c61e 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1536,6 +1536,7 @@ static int __init netback_init(void)
 {
 	int i;
 	struct page *page;
+	int rc = 0;

 	if (!xen_domain())
 		return -ENODEV;
@@ -1583,7 +1584,9 @@ static int __init netback_init(void)

 	//netif_accel_init();

-	netif_xenbus_init();
+	rc = netif_xenbus_init();
+	if (rc)
+		goto failed_init;

 #ifdef NETBE_DEBUG_INTERRUPT
 	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
@@ -1595,6 +1598,13 @@ static int __init netback_init(void)
 #endif

 	return 0;
+
+failed_init:
+	free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS);
+	del_timer(&netbk_tx_pending_timer);
+	del_timer(&net_timer);
+	return rc;
+
 }

 module_init(netback_init);
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index a492288..c46b235 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -447,8 +447,8 @@ static struct xenbus_driver netback = {
 };


-void netif_xenbus_init(void)
+int netif_xenbus_init(void)
 {
 	printk(KERN_CRIT "registering netback\n");
-	(void)xenbus_register_backend(&netback);
+	return xenbus_register_backend(&netback);
 }
--
1.7.4


From 4bc919e07d5dc48cb95b22cc6e90c6110c229343 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 27 Oct 2009 12:37:50 -0700
Subject: [PATCH 081/197] xen: netback: don't screw around with packet gso state

These lines were reverted from 2.6.18 netback as the network stack
was modified to deal with packets shorter than the gso size, so there's
no need to fiddle with the gso state in netback.

Taken from linux-2.6.18-xen.hg change 8081d19dce89

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    5 -----
 1 files changed, 0 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 860c61e..9a14976 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1293,11 +1293,6 @@ static unsigned net_tx_build_mops(void)
 			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
 		}

-		if (skb->data_len < skb_shinfo(skb)->gso_size) {
-			skb_shinfo(skb)->gso_size = 0;
-			skb_shinfo(skb)->gso_type = 0;
-		}
-
 		__skb_queue_tail(&tx_queue, skb);

 		pending_cons++;
--
1.7.4


From f2b947783c47a721497e5d325c736234f71501e7 Mon Sep 17 00:00:00 2001
From: Steven Smith <ssmith@weybridge.uk.xensource.com>
Date: Fri, 30 Oct 2009 13:55:23 -0700
Subject: [PATCH 082/197] xen: netback: make sure that pg->mapping is never NULL for a page mapped from a foreign domain.

Otherwise, the foreign maps tracking infrastructure gets confused, and
thinks that the foreign page is local.  This means that you can't
forward that packet to another foreign domain.  This leads to very
high packet drop, and hence very poor performance.

Signed-off-by: Steven Smith <steven.smith@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 9a14976..111fec7 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -97,12 +97,12 @@ static inline unsigned long idx_to_kaddr(unsigned int idx)
 /* extra field used in struct page */
 static inline void netif_set_page_index(struct page *pg, unsigned int index)
 {
-	*(unsigned long *)&pg->mapping = index;
+	*(unsigned long *)&pg->mapping = index + 1;
 }

 static inline int netif_page_index(struct page *pg)
 {
-	unsigned long idx = (unsigned long)pg->mapping;
+	unsigned long idx = (unsigned long)pg->mapping - 1;

 	if (!PageForeign(pg))
 		return -1;
--
1.7.4


From df8b27ea0fb2695842104e06caaecb55780577a7 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Thu, 3 Dec 2009 21:56:19 +0000
Subject: [PATCH 083/197] xen: rename netbk module xen-netback.

netbk is rather generic for a modular distro style kernel.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/Makefile |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
index a01a1a3..e346e81 100644
--- a/drivers/xen/netback/Makefile
+++ b/drivers/xen/netback/Makefile
@@ -1,3 +1,3 @@
-obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
+obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o

-netbk-y := netback.o xenbus.o interface.o
+xen-netback-y := netback.o xenbus.o interface.o
--
1.7.4


From 279097395ad64ae4df15e206a487cd5fd3be39a8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 16 Feb 2010 14:40:37 -0800
Subject: [PATCH 084/197] xen: netback: use dev_get/set_drvdata() inteface

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/xenbus.c |   10 +++++-----
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index c46b235..79e6fb0 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -35,7 +35,7 @@ static void backend_create_netif(struct backend_info *be);

 static int netback_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+  struct backend_info *be = dev_get_drvdata(&dev->dev);

 	//netback_remove_accelerators(be, dev);

@@ -45,7 +45,7 @@ static int netback_remove(struct xenbus_device *dev)
 		be->netif = NULL;
 	}
 	kfree(be);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return 0;
 }

@@ -70,7 +70,7 @@ static int netback_probe(struct xenbus_device *dev,
 	}

 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);

 	sg = 1;
 	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
@@ -151,7 +151,7 @@ fail:
  */
 static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
 {
-	struct backend_info *be = xdev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&xdev->dev);
 	struct xen_netif *netif = be->netif;
 	char *val;

@@ -211,7 +211,7 @@ static void backend_create_netif(struct backend_info *be)
 static void frontend_changed(struct xenbus_device *dev,
 			     enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);

 	DPRINTK("%s", xenbus_strstate(frontend_state));

--
1.7.4


From 31d0b5f5763faf607e32f3b5a0f6b37a34bbbf09 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 16 Feb 2010 14:41:12 -0800
Subject: [PATCH 085/197] xen: netback: include linux/sched.h for TASK_* definitions

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 0675946..d8653d3 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -38,6 +38,8 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/wait.h>
+#include <linux/sched.h>
+
 #include <xen/interface/io/netif.h>
 #include <asm/io.h>
 #include <asm/pgalloc.h>
--
1.7.4


From cdefc88924b3cdfcac64be737a00a4ec5593cfd5 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 23 Feb 2010 11:52:27 +0000
Subject: [PATCH 086/197] xen: netback: remove unused xen_network_done code

It has been disabled effectively forever.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |   23 -----------------------
 1 files changed, 0 insertions(+), 23 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 111fec7..4b24893 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -343,25 +343,6 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }

-#if 0
-static void xen_network_done_notify(void)
-{
-	static struct net_device *eth0_dev = NULL;
-	if (unlikely(eth0_dev == NULL))
-		eth0_dev = __dev_get_by_name("eth0");
-	netif_rx_schedule(eth0_dev);
-}
-/*
- * Add following to poll() function in NAPI driver (Tigon3 is example):
- *  if ( xen_network_done() )
- *      tg3_enable_ints(tp);
- */
-int xen_network_done(void)
-{
-	return skb_queue_empty(&rx_queue);
-}
-#endif
-
 struct netrx_pending_operations {
 	unsigned trans_prod, trans_cons;
 	unsigned mmu_prod, mmu_mcl;
@@ -664,10 +645,6 @@ static void net_rx_action(unsigned long unused)
 	/* More work to do? */
 	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
 		tasklet_schedule(&net_rx_tasklet);
-#if 0
-	else
-		xen_network_done_notify();
-#endif
 }

 static void net_alarm(unsigned long unused)
--
1.7.4


From 994be068dd9947cedcee69a7185e54738cda33d4 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 23 Feb 2010 11:58:26 +0000
Subject: [PATCH 087/197] xen: netback: factor disconnect from backend into new function.

Makes subsequent patches cleaner.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/xenbus.c |   16 ++++++++++++----
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index 79e6fb0..1f36b4d4 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -205,6 +205,16 @@ static void backend_create_netif(struct backend_info *be)
 }


+static void disconnect_backend(struct xenbus_device *dev)
+{
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+	if (be->netif) {
+		netif_disconnect(be->netif);
+		be->netif = NULL;
+	}
+}
+
 /**
  * Callback received when the frontend's state changes.
  */
@@ -238,11 +248,9 @@ static void frontend_changed(struct xenbus_device *dev,
 		break;

 	case XenbusStateClosing:
-		if (be->netif) {
+		if (be->netif)
 			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-			netif_disconnect(be->netif);
-			be->netif = NULL;
-		}
+		disconnect_backend(dev);
 		xenbus_switch_state(dev, XenbusStateClosing);
 		break;

--
1.7.4


From 9dcb4c18e5b29d8862cd7783d5b0040913010563 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 23 Feb 2010 12:10:24 +0000
Subject: [PATCH 088/197] xen: netback: wait for hotplug scripts to complete before signalling connected to frontend

Avoid the situation where the frontend is sending packets but the
domain 0 bridging (or whatever) is not yet configured (because the
hotplug scripts are too slow) and so packets get dropped.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Steven.Smith@citrix.com
---
 drivers/xen/netback/common.h |    2 +
 drivers/xen/netback/xenbus.c |   45 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index d8653d3..1983768 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -145,6 +145,8 @@ struct backend_info {
 	struct xenbus_device *dev;
 	struct xen_netif *netif;
 	enum xenbus_state frontend_state;
+	struct xenbus_watch hotplug_status_watch;
+	int have_hotplug_status_watch:1;

 	/* State relating to the netback accelerator */
 	void *netback_accel_priv;
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index 1f36b4d4..d2407cc 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -32,6 +32,7 @@
 static int connect_rings(struct backend_info *);
 static void connect(struct backend_info *);
 static void backend_create_netif(struct backend_info *be);
+static void unregister_hotplug_status_watch(struct backend_info *be);

 static int netback_remove(struct xenbus_device *dev)
 {
@@ -39,8 +40,10 @@ static int netback_remove(struct xenbus_device *dev)

 	//netback_remove_accelerators(be, dev);

+	unregister_hotplug_status_watch(be);
 	if (be->netif) {
 		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
 		netif_disconnect(be->netif);
 		be->netif = NULL;
 	}
@@ -210,6 +213,7 @@ static void disconnect_backend(struct xenbus_device *dev)
 	struct backend_info *be = dev_get_drvdata(&dev->dev);

 	if (be->netif) {
+		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
 		netif_disconnect(be->netif);
 		be->netif = NULL;
 	}
@@ -329,6 +333,36 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
 	return 0;
 }

+static void unregister_hotplug_status_watch(struct backend_info *be)
+{
+	if (be->have_hotplug_status_watch) {
+		unregister_xenbus_watch(&be->hotplug_status_watch);
+		kfree(be->hotplug_status_watch.node);
+	}
+	be->have_hotplug_status_watch = 0;
+}
+
+static void hotplug_status_changed(struct xenbus_watch *watch,
+				   const char **vec,
+				   unsigned int vec_size)
+{
+	struct backend_info *be = container_of(watch,
+					       struct backend_info,
+					       hotplug_status_watch);
+	char *str;
+	unsigned int len;
+
+	str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
+	if (IS_ERR(str))
+		return;
+	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
+		xenbus_switch_state(be->dev, XenbusStateConnected);
+		/* Not interested in this watch anymore. */
+		unregister_hotplug_status_watch(be);
+	}
+	kfree(str);
+}
+
 static void connect(struct backend_info *be)
 {
 	int err;
@@ -348,7 +382,16 @@ static void connect(struct backend_info *be)
 			  &be->netif->credit_usec);
 	be->netif->remaining_credit = be->netif->credit_bytes;

-	xenbus_switch_state(dev, XenbusStateConnected);
+	unregister_hotplug_status_watch(be);
+	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
+				   hotplug_status_changed,
+				   "%s/%s", dev->nodename, "hotplug-status");
+	if (err) {
+		/* Switch now, since we can't do a watch. */
+		xenbus_switch_state(dev, XenbusStateConnected);
+	} else {
+		be->have_hotplug_status_watch = 1;
+	}

 	netif_wake_queue(be->netif->dev);
 }
--
1.7.4


From 509cc7f20f866277a8f5d5895bb266b5b68aac6d Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 23 Feb 2010 12:11:51 +0000
Subject: [PATCH 089/197] xen: netback: Always pull through PKT_PROT_LEN bytes into the linear part of an skb.

Previously PKT_PROT_LEN would only have an effect on the first fragment.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 4b24893..d4a7a56 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1334,6 +1334,16 @@ static void net_tx_submit(void)

 		netbk_fill_frags(skb);

+		/*
+		 * If the initial fragment was < PKT_PROT_LEN then
+		 * pull through some bytes from the other fragments to
+		 * increase the linear region to PKT_PROT_LEN bytes.
+		 */
+		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
+			int target = min_t(int, skb->len, PKT_PROT_LEN);
+			__pskb_pull_tail(skb, target - skb_headlen(skb));
+		}
+
 		skb->dev      = netif->dev;
 		skb->protocol = eth_type_trans(skb, skb->dev);

--
1.7.4


From 673c82b5110cfffafe1e1978bc07d6d10d111d50 Mon Sep 17 00:00:00 2001
From: Steven Smith <ssmith@xensource.com>
Date: Tue, 23 Feb 2010 11:49:26 +0000
Subject: [PATCH 090/197] xen: netback: try to pull a minimum of 72 bytes into the skb data area
 when receiving a packet into netback.

The previous number, 64, tended to place a fragment boundary in the middle of
the TCP header options and led to unnecessary fragmentation in Windows <->
Windows networking.

Signed-off-by: Steven Smith <ssmith@xensource.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |    9 +++------
 1 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index d4a7a56..44357d7 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -116,13 +116,10 @@ static inline int netif_page_index(struct page *pg)
 /*
  * This is the amount of packet we copy rather than map, so that the
  * guest can't fiddle with the contents of the headers while we do
- * packet processing on them (netfilter, routing, etc).  This could
- * probably do with being larger, since 1) 64-bytes isn't necessarily
- * long enough to cover a full christmas-tree ip+tcp header, let alone
- * packet contents, and 2) the data is probably in cache anyway
- * (though perhaps some other cpu's cache).
+ * packet processing on them (netfilter, routing, etc). 72 is enough
+ * to cover TCP+IP headers including options.
  */
-#define PKT_PROT_LEN 64
+#define PKT_PROT_LEN 72

 static struct pending_tx_info {
 	struct xen_netif_tx_request req;
--
1.7.4


From c83bd213efd3ebf700189249c30d987b1cb14d7e Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 23 Feb 2010 11:54:30 +0000
Subject: [PATCH 091/197] xen: netback: Allow setting of large MTU before rings have connected.

This allows large MTU to be configured by the VIF hotplug
script. Previously this would fail because at the point the hotplug
script runs the VIF features have most likely not been negotiated with
the frontend and so SG has not yet been enabled. Invert this behaviour
so that SG is assumed present until negotiations prove otherwise and
reduce MTU at that point.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/interface.c |    6 +++++-
 drivers/xen/netback/xenbus.c    |    8 +++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index 21c1f95..b23b14d 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -104,6 +104,9 @@ static int netbk_set_sg(struct net_device *dev, u32 data)
 			return -ENOSYS;
 	}

+	if (dev->mtu > ETH_DATA_LEN)
+		dev->mtu = ETH_DATA_LEN;
+
 	return ethtool_op_set_sg(dev, data);
 }

@@ -207,6 +210,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
 	memset(netif, 0, sizeof(*netif));
 	netif->domid  = domid;
 	netif->handle = handle;
+	netif->features = NETIF_F_SG;
 	atomic_set(&netif->refcnt, 1);
 	init_waitqueue_head(&netif->waiting_to_free);
 	netif->dev = dev;
@@ -223,7 +227,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
 	init_timer(&netif->tx_queue_timeout);

 	dev->netdev_ops	= &netback_ops;
-	dev->features	= NETIF_F_IP_CSUM;
+	dev->features   = NETIF_F_IP_CSUM|NETIF_F_SG;

 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);

diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index d2407cc..fcd3c34 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -445,9 +445,11 @@ static int connect_rings(struct backend_info *be)

 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
 		val = 0;
-	if (val) {
-		be->netif->features |= NETIF_F_SG;
-		be->netif->dev->features |= NETIF_F_SG;
+	if (!val) {
+		be->netif->features &= ~NETIF_F_SG;
+		be->netif->dev->features &= ~NETIF_F_SG;
+		if (be->netif->dev->mtu > ETH_DATA_LEN)
+			be->netif->dev->mtu = ETH_DATA_LEN;
 	}

 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
--
1.7.4


From e5cd35b00cb63f3a3fa1651260a58d59bbc134b7 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 19 Mar 2010 13:09:16 -0700
Subject: [PATCH 092/197] xen: netback: use get_sset_count rather than obsolete get_stats_count

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/interface.c |   11 ++++++++---
 1 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index b23b14d..086d939 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -136,9 +136,14 @@ static const struct netif_stat {
 	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
 };

-static int netbk_get_stats_count(struct net_device *dev)
+static int netbk_get_sset_count(struct net_device *dev, int string_set)
 {
-	return ARRAY_SIZE(netbk_stats);
+	switch (string_set) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(netbk_stats);
+	default:
+		return -EINVAL;
+	}
 }

 static void netbk_get_ethtool_stats(struct net_device *dev,
@@ -176,7 +181,7 @@ static struct ethtool_ops network_ethtool_ops =
 	.set_tso = netbk_set_tso,
 	.get_link = ethtool_op_get_link,

-	.get_stats_count = netbk_get_stats_count,
+	.get_sset_count = netbk_get_sset_count,
 	.get_ethtool_stats = netbk_get_ethtool_stats,
 	.get_strings = netbk_get_strings,
 };
--
1.7.4


From 0c34835ee66ad641f01a8077a973b7ec1bfdcd86 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 11 May 2010 09:33:42 +0100
Subject: [PATCH 093/197] xen: netback: correctly setup skb->ip_summed on receive

In 2.6.18 CHECKSUM_PARTIAL and CHECKSUM_UNNECESSARY were both synonyms for
CHECKSUM_HW. This is no longer the case and we need to select the correct one.

  data_validated csum_blank -> ip_summed
  0              0             CHECKSUM_NONE
  0              1             CHECKSUM_PARTIAL
  1              0             CHECKSUM_UNNECESSARY
  1              1             CHECKSUM_PARTIAL

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Tested-by: Matej Zary <zary@cvtisr.sk>
Tested-by: Michael D Labriola <mlabriol@gdeb.com>
---
 drivers/xen/netback/netback.c |   10 +++-------
 1 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 44357d7..725da0f 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1320,14 +1320,10 @@ static void net_tx_submit(void)
 			netif_idx_release(pending_idx);
 		}

-		/*
-		 * Old frontends do not assert data_validated but we
-		 * can infer it from csum_blank so test both flags.
-		 */
-		if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank))
+		if (txp->flags & NETTXF_csum_blank)
 			skb->ip_summed = CHECKSUM_PARTIAL;
-		else
-			skb->ip_summed = CHECKSUM_NONE;
+		else if (txp->flags & NETTXF_data_validated)
+			skb->ip_summed = CHECKSUM_UNNECESSARY;

 		netbk_fill_frags(skb);

--
1.7.4


From 094944631cc5a9d6e623302c987f78117c0bf7ac Mon Sep 17 00:00:00 2001
From: Dongxiao Xu <dongxiao.xu@intel.com>
Date: Wed, 19 May 2010 16:58:56 -0700
Subject: [PATCH 094/197] xen: netback: Move global/static variables into struct xen_netbk.

Bundle a lot of discrete variables into a single structure.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h  |   59 +++++++
 drivers/xen/netback/netback.c |  360 ++++++++++++++++++++---------------------
 2 files changed, 232 insertions(+), 187 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 1983768..00208f4 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -222,4 +222,63 @@ static inline int netbk_can_sg(struct net_device *dev)
 	return netif->features & NETIF_F_SG;
 }

+struct pending_tx_info {
+	struct xen_netif_tx_request req;
+	struct xen_netif *netif;
+};
+typedef unsigned int pending_ring_idx_t;
+
+struct netbk_rx_meta {
+	skb_frag_t frag;
+	int id;
+};
+
+struct netbk_tx_pending_inuse {
+	struct list_head list;
+	unsigned long alloc_time;
+};
+
+#define MAX_PENDING_REQS 256
+
+struct xen_netbk {
+	struct tasklet_struct net_tx_tasklet;
+	struct tasklet_struct net_rx_tasklet;
+
+	struct sk_buff_head rx_queue;
+	struct sk_buff_head tx_queue;
+
+	struct timer_list net_timer;
+	struct timer_list netbk_tx_pending_timer;
+
+	struct page **mmap_pages;
+
+	pending_ring_idx_t pending_prod;
+	pending_ring_idx_t pending_cons;
+	pending_ring_idx_t dealloc_prod;
+	pending_ring_idx_t dealloc_cons;
+
+	struct list_head pending_inuse_head;
+	struct list_head net_schedule_list;
+
+	/* Protect the net_schedule_list in netif. */
+	spinlock_t net_schedule_list_lock;
+
+	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+
+	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+	u16 pending_ring[MAX_PENDING_REQS];
+	u16 dealloc_ring[MAX_PENDING_REQS];
+
+	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
+	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+	struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
+	struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
+	unsigned char rx_notify[NR_IRQS];
+	u16 notify_list[NET_RX_RING_SIZE];
+	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+};
+
 #endif /* __NETIF__BACKEND__COMMON_H__ */
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 725da0f..417f497 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -48,16 +48,7 @@

 /*define NETBE_DEBUG_INTERRUPT*/

-struct netbk_rx_meta {
-	skb_frag_t frag;
-	int id;
-};
-
-struct netbk_tx_pending_inuse {
-	struct list_head list;
-	unsigned long alloc_time;
-};
-
+static struct xen_netbk *netbk;

 static void netif_idx_release(u16 pending_idx);
 static void make_tx_response(struct xen_netif *netif,
@@ -71,22 +62,12 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
 					     u16      flags);

 static void net_tx_action(unsigned long unused);
-static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);

 static void net_rx_action(unsigned long unused);
-static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
-
-static struct timer_list net_timer;
-static struct timer_list netbk_tx_pending_timer;

-#define MAX_PENDING_REQS 256
-
-static struct sk_buff_head rx_queue;
-
-static struct page **mmap_pages;
 static inline unsigned long idx_to_pfn(unsigned int idx)
 {
-	return page_to_pfn(mmap_pages[idx]);
+	return page_to_pfn(netbk->mmap_pages[idx]);
 }

 static inline unsigned long idx_to_kaddr(unsigned int idx)
@@ -107,7 +88,7 @@ static inline int netif_page_index(struct page *pg)
 	if (!PageForeign(pg))
 		return -1;

-	if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
+	if ((idx >= MAX_PENDING_REQS) || (netbk->mmap_pages[idx] != pg))
 		return -1;

 	return idx;
@@ -121,46 +102,17 @@ static inline int netif_page_index(struct page *pg)
  */
 #define PKT_PROT_LEN 72

-static struct pending_tx_info {
-	struct xen_netif_tx_request req;
-	struct xen_netif *netif;
-} pending_tx_info[MAX_PENDING_REQS];
-static u16 pending_ring[MAX_PENDING_REQS];
-typedef unsigned int pending_ring_idx_t;
-
 static inline pending_ring_idx_t pending_index(unsigned i)
 {
 	return i & (MAX_PENDING_REQS-1);
 }

-static pending_ring_idx_t pending_prod, pending_cons;
-
 static inline pending_ring_idx_t nr_pending_reqs(void)
 {
-	return MAX_PENDING_REQS - pending_prod + pending_cons;
+	return MAX_PENDING_REQS -
+		netbk->pending_prod + netbk->pending_cons;
 }

-/* Freed TX SKBs get batched on this ring before return to pending_ring. */
-static u16 dealloc_ring[MAX_PENDING_REQS];
-static pending_ring_idx_t dealloc_prod, dealloc_cons;
-
-/* Doubly-linked list of in-use pending entries. */
-static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-static LIST_HEAD(pending_inuse_head);
-
-static struct sk_buff_head tx_queue;
-
-static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-static struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-static struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
-
-static LIST_HEAD(net_schedule_list);
-static DEFINE_SPINLOCK(net_schedule_list_lock);
-
-#define MAX_MFN_ALLOC 64
-static unsigned long mfn_list[MAX_MFN_ALLOC];
-static unsigned int alloc_index = 0;
-
 /* Setting this allows the safe use of this driver without netloop. */
 static int MODPARM_copy_skb = 1;
 module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
@@ -168,18 +120,12 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");

 int netbk_copy_skb_mode;

-static inline unsigned long alloc_mfn(void)
-{
-	BUG_ON(alloc_index == 0);
-	return mfn_list[--alloc_index];
-}
-
 static inline void maybe_schedule_tx_action(void)
 {
 	smp_mb();
 	if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
-	    !list_empty(&net_schedule_list))
-		tasklet_schedule(&net_tx_tasklet);
+	    !list_empty(&netbk->net_schedule_list))
+		tasklet_schedule(&netbk->net_tx_tasklet);
 }

 static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
@@ -328,9 +274,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
 		}
 	}
-
-	skb_queue_tail(&rx_queue, skb);
-	tasklet_schedule(&net_rx_tasklet);
+	skb_queue_tail(&netbk->rx_queue, skb);
+	tasklet_schedule(&netbk->net_rx_tasklet);

 	return 0;

@@ -372,7 +317,7 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
 	copy_gop = npo->copy + npo->copy_prod++;
 	copy_gop->flags = GNTCOPY_dest_gref;
 	if (idx > -1) {
-		struct pending_tx_info *src_pend = &pending_tx_info[idx];
+		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
 		copy_gop->source.domid = src_pend->netif->domid;
 		copy_gop->source.u.ref = src_pend->req.gref;
 		copy_gop->flags |= GNTCOPY_source_gref;
@@ -487,30 +432,19 @@ static void net_rx_action(unsigned long unused)
 	int count;
 	unsigned long offset;

-	/*
-	 * Putting hundreds of bytes on the stack is considered rude.
-	 * Static works because a tasklet can only be on one CPU at any time.
-	 */
-	static struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
-	static struct mmu_update rx_mmu[NET_RX_RING_SIZE];
-	static struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
-	static struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
-	static unsigned char rx_notify[NR_IRQS];
-	static u16 notify_list[NET_RX_RING_SIZE];
-	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
-
 	struct netrx_pending_operations npo = {
-		mmu: rx_mmu,
-		trans: grant_trans_op,
-		copy: grant_copy_op,
-		mcl: rx_mcl,
-		meta: meta};
+		.mmu   = netbk->rx_mmu,
+		.trans = netbk->grant_trans_op,
+		.copy  = netbk->grant_copy_op,
+		.mcl   = netbk->rx_mcl,
+		.meta  = netbk->meta,
+	};

 	skb_queue_head_init(&rxq);

 	count = 0;

-	while ((skb = skb_dequeue(&rx_queue)) != NULL) {
+	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
 		nr_frags = skb_shinfo(skb)->nr_frags;
 		*(int *)skb->cb = nr_frags;

@@ -525,39 +459,39 @@ static void net_rx_action(unsigned long unused)
 			break;
 	}

-	BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
+	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));

 	npo.mmu_mcl = npo.mcl_prod;
 	if (npo.mcl_prod) {
 		BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-		BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
+		BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
 		mcl = npo.mcl + npo.mcl_prod++;

 		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
 		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;

 		mcl->op = __HYPERVISOR_mmu_update;
-		mcl->args[0] = (unsigned long)rx_mmu;
+		mcl->args[0] = (unsigned long)netbk->rx_mmu;
 		mcl->args[1] = npo.mmu_prod;
 		mcl->args[2] = 0;
 		mcl->args[3] = DOMID_SELF;
 	}

 	if (npo.trans_prod) {
-		BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
+		BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
 		mcl = npo.mcl + npo.mcl_prod++;
 		mcl->op = __HYPERVISOR_grant_table_op;
 		mcl->args[0] = GNTTABOP_transfer;
-		mcl->args[1] = (unsigned long)grant_trans_op;
+		mcl->args[1] = (unsigned long)netbk->grant_trans_op;
 		mcl->args[2] = npo.trans_prod;
 	}

 	if (npo.copy_prod) {
-		BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
+		BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
 		mcl = npo.mcl + npo.mcl_prod++;
 		mcl->op = __HYPERVISOR_grant_table_op;
 		mcl->args[0] = GNTTABOP_copy;
-		mcl->args[1] = (unsigned long)grant_copy_op;
+		mcl->args[1] = (unsigned long)netbk->grant_copy_op;
 		mcl->args[2] = npo.copy_prod;
 	}

@@ -565,7 +499,7 @@ static void net_rx_action(unsigned long unused)
 	if (!npo.mcl_prod)
 		return;

-	BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
+	BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));

 	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
 	BUG_ON(ret != 0);
@@ -582,7 +516,7 @@ static void net_rx_action(unsigned long unused)

 		status = netbk_check_gop(nr_frags, netif->domid, &npo);

-		id = meta[npo.meta_cons].id;
+		id = netbk->meta[npo.meta_cons].id;
 		flags = nr_frags ? NETRXF_more_data : 0;

 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
@@ -595,7 +529,7 @@ static void net_rx_action(unsigned long unused)
 		resp = make_rx_response(netif, id, status, offset,
 					skb_headlen(skb), flags);

-		if (meta[npo.meta_cons].frag.size) {
+		if (netbk->meta[npo.meta_cons].frag.size) {
 			struct xen_netif_extra_info *gso =
 				(struct xen_netif_extra_info *)
 				RING_GET_RESPONSE(&netif->rx,
@@ -603,7 +537,7 @@ static void net_rx_action(unsigned long unused)

 			resp->flags |= NETRXF_extra_info;

-			gso->u.gso.size = meta[npo.meta_cons].frag.size;
+			gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
 			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 			gso->u.gso.pad = 0;
 			gso->u.gso.features = 0;
@@ -613,14 +547,14 @@ static void net_rx_action(unsigned long unused)
 		}

 		netbk_add_frag_responses(netif, status,
-					 meta + npo.meta_cons + 1,
-					 nr_frags);
+				netbk->meta + npo.meta_cons + 1,
+				nr_frags);

 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
 		irq = netif->irq;
-		if (ret && !rx_notify[irq]) {
-			rx_notify[irq] = 1;
-			notify_list[notify_nr++] = irq;
+		if (ret && !netbk->rx_notify[irq]) {
+			netbk->rx_notify[irq] = 1;
+			netbk->notify_list[notify_nr++] = irq;
 		}

 		if (netif_queue_stopped(netif->dev) &&
@@ -634,24 +568,25 @@ static void net_rx_action(unsigned long unused)
 	}

 	while (notify_nr != 0) {
-		irq = notify_list[--notify_nr];
-		rx_notify[irq] = 0;
+		irq = netbk->notify_list[--notify_nr];
+		netbk->rx_notify[irq] = 0;
 		notify_remote_via_irq(irq);
 	}

 	/* More work to do? */
-	if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
-		tasklet_schedule(&net_rx_tasklet);
+	if (!skb_queue_empty(&netbk->rx_queue) &&
+			!timer_pending(&netbk->net_timer))
+		tasklet_schedule(&netbk->net_rx_tasklet);
 }

 static void net_alarm(unsigned long unused)
 {
-	tasklet_schedule(&net_rx_tasklet);
+	tasklet_schedule(&netbk->net_rx_tasklet);
 }

 static void netbk_tx_pending_timeout(unsigned long unused)
 {
-	tasklet_schedule(&net_tx_tasklet);
+	tasklet_schedule(&netbk->net_tx_tasklet);
 }

 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
@@ -667,12 +602,12 @@ static int __on_net_schedule_list(struct xen_netif *netif)

 static void remove_from_net_schedule_list(struct xen_netif *netif)
 {
-	spin_lock_irq(&net_schedule_list_lock);
+	spin_lock_irq(&netbk->net_schedule_list_lock);
 	if (likely(__on_net_schedule_list(netif))) {
 		list_del_init(&netif->list);
 		netif_put(netif);
 	}
-	spin_unlock_irq(&net_schedule_list_lock);
+	spin_unlock_irq(&netbk->net_schedule_list_lock);
 }

 static void add_to_net_schedule_list_tail(struct xen_netif *netif)
@@ -680,13 +615,13 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)
 	if (__on_net_schedule_list(netif))
 		return;

-	spin_lock_irq(&net_schedule_list_lock);
+	spin_lock_irq(&netbk->net_schedule_list_lock);
 	if (!__on_net_schedule_list(netif) &&
 	    likely(netif_schedulable(netif))) {
-		list_add_tail(&netif->list, &net_schedule_list);
+		list_add_tail(&netif->list, &netbk->net_schedule_list);
 		netif_get(netif);
 	}
-	spin_unlock_irq(&net_schedule_list_lock);
+	spin_unlock_irq(&netbk->net_schedule_list_lock);
 }

 void netif_schedule_work(struct xen_netif *netif)
@@ -736,8 +671,9 @@ static void tx_credit_callback(unsigned long data)

 static inline int copy_pending_req(pending_ring_idx_t pending_idx)
 {
-	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
-				      &mmap_pages[pending_idx]);
+	return gnttab_copy_grant_page(
+			netbk->grant_tx_handle[pending_idx],
+			&netbk->mmap_pages[pending_idx]);
 }

 inline static void net_tx_action_dealloc(void)
@@ -750,22 +686,24 @@ inline static void net_tx_action_dealloc(void)
 	int ret;
 	LIST_HEAD(list);

-	dc = dealloc_cons;
-	gop = tx_unmap_ops;
+	dc = netbk->dealloc_cons;
+	gop = netbk->tx_unmap_ops;

 	/*
 	 * Free up any grants we have finished using
 	 */
 	do {
-		dp = dealloc_prod;
+		dp = netbk->dealloc_prod;

 		/* Ensure we see all indices enqueued by netif_idx_release(). */
 		smp_rmb();

 		while (dc != dp) {
 			unsigned long pfn;
+			struct netbk_tx_pending_inuse *pending_inuse =
+					netbk->pending_inuse;

-			pending_idx = dealloc_ring[pending_index(dc++)];
+			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
 			list_move_tail(&pending_inuse[pending_idx].list, &list);

 			pfn = idx_to_pfn(pending_idx);
@@ -773,22 +711,27 @@ inline static void net_tx_action_dealloc(void)
 			if (!phys_to_machine_mapping_valid(pfn))
 				continue;

-			gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
-					    GNTMAP_host_map,
-					    grant_tx_handle[pending_idx]);
+			gnttab_set_unmap_op(gop,
+					idx_to_kaddr(pending_idx),
+					GNTMAP_host_map,
+					netbk->grant_tx_handle[pending_idx]);
 			gop++;
 		}

 		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
-		    list_empty(&pending_inuse_head))
+		    list_empty(&netbk->pending_inuse_head))
 			break;

 		/* Copy any entries that have been pending for too long. */
-		list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
+		list_for_each_entry_safe(inuse, n,
+				&netbk->pending_inuse_head, list) {
+			struct pending_tx_info *pending_tx_info;
+			pending_tx_info = netbk->pending_tx_info;
+
 			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
 				break;

-			pending_idx = inuse - pending_inuse;
+			pending_idx = inuse - netbk->pending_inuse;

 			pending_tx_info[pending_idx].netif->nr_copied_skbs++;

@@ -805,16 +748,21 @@ inline static void net_tx_action_dealloc(void)

 			break;
 		}
-	} while (dp != dealloc_prod);
+	} while (dp != netbk->dealloc_prod);

-	dealloc_cons = dc;
+	netbk->dealloc_cons = dc;

 	ret = HYPERVISOR_grant_table_op(
-		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
+		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
+		gop - netbk->tx_unmap_ops);
 	BUG_ON(ret);

 	list_for_each_entry_safe(inuse, n, &list, list) {
-		pending_idx = inuse - pending_inuse;
+		struct pending_tx_info *pending_tx_info;
+		pending_ring_idx_t index;
+
+		pending_tx_info = netbk->pending_tx_info;
+		pending_idx = inuse - netbk->pending_inuse;

 		netif = pending_tx_info[pending_idx].netif;

@@ -822,9 +770,10 @@ inline static void net_tx_action_dealloc(void)
 				 NETIF_RSP_OKAY);

 		/* Ready for next use. */
-		gnttab_reset_grant_page(mmap_pages[pending_idx]);
+		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);

-		pending_ring[pending_index(pending_prod++)] = pending_idx;
+		index = pending_index(netbk->pending_prod++);
+		netbk->pending_ring[index] = pending_idx;

 		netif_put(netif);

@@ -832,7 +781,8 @@ inline static void net_tx_action_dealloc(void)
 	}
 }

-static void netbk_tx_err(struct xen_netif *netif, struct xen_netif_tx_request *txp, RING_IDX end)
+static void netbk_tx_err(struct xen_netif *netif,
+		struct xen_netif_tx_request *txp, RING_IDX end)
 {
 	RING_IDX cons = netif->tx.req_cons;

@@ -902,7 +852,12 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);

 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
-		pending_idx = pending_ring[pending_index(pending_cons++)];
+		pending_ring_idx_t index;
+		struct pending_tx_info *pending_tx_info =
+			netbk->pending_tx_info;
+
+		index = pending_index(netbk->pending_cons++);
+		pending_idx = netbk->pending_ring[index];

 		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
 				  GNTMAP_host_map | GNTMAP_readonly,
@@ -922,6 +877,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
 {
 	struct gnttab_map_grant_ref *mop = *mopp;
 	int pending_idx = *((u16 *)skb->data);
+	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
 	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
 	struct xen_netif_tx_request *txp;
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -931,15 +887,17 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
 	/* Check status of header. */
 	err = mop->status;
 	if (unlikely(err)) {
+		pending_ring_idx_t index;
+		index = pending_index(netbk->pending_prod++);
 		txp = &pending_tx_info[pending_idx].req;
 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-		pending_ring[pending_index(pending_prod++)] = pending_idx;
+		netbk->pending_ring[index] = pending_idx;
 		netif_put(netif);
 	} else {
 		set_phys_to_machine(
 			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
 			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-		grant_tx_handle[pending_idx] = mop->handle;
+		netbk->grant_tx_handle[pending_idx] = mop->handle;
 	}

 	/* Skip first skb fragment if it is on same page as header fragment. */
@@ -947,16 +905,19 @@ static int netbk_tx_check_mop(struct sk_buff *skb,

 	for (i = start; i < nr_frags; i++) {
 		int j, newerr;
+		pending_ring_idx_t index;

 		pending_idx = (unsigned long)shinfo->frags[i].page;

 		/* Check error status: if okay then remember grant handle. */
 		newerr = (++mop)->status;
 		if (likely(!newerr)) {
+			unsigned long addr;
+			addr = idx_to_kaddr(pending_idx);
 			set_phys_to_machine(
-				__pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
+				__pa(addr)>>PAGE_SHIFT,
 				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
-			grant_tx_handle[pending_idx] = mop->handle;
+			netbk->grant_tx_handle[pending_idx] = mop->handle;
 			/* Had a previous error? Invalidate this fragment. */
 			if (unlikely(err))
 				netif_idx_release(pending_idx);
@@ -964,9 +925,10 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
 		}

 		/* Error on this fragment: respond to client with an error. */
-		txp = &pending_tx_info[pending_idx].req;
+		txp = &netbk->pending_tx_info[pending_idx].req;
 		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-		pending_ring[pending_index(pending_prod++)] = pending_idx;
+		index = pending_index(netbk->pending_prod++);
+		netbk->pending_ring[index] = pending_idx;
 		netif_put(netif);

 		/* Not the first error? Preceding frags already invalidated. */
@@ -1002,11 +964,11 @@ static void netbk_fill_frags(struct sk_buff *skb)

 		pending_idx = (unsigned long)frag->page;

-		pending_inuse[pending_idx].alloc_time = jiffies;
-		list_add_tail(&pending_inuse[pending_idx].list,
-			      &pending_inuse_head);
+		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
+		list_add_tail(&netbk->pending_inuse[pending_idx].list,
+			      &netbk->pending_inuse_head);

-		txp = &pending_tx_info[pending_idx].req;
+		txp = &netbk->pending_tx_info[pending_idx].req;
 		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
 		frag->size = txp->size;
 		frag->page_offset = txp->offset;
@@ -1145,9 +1107,9 @@ static unsigned net_tx_build_mops(void)
 	struct sk_buff *skb;
 	int ret;

-	mop = tx_map_ops;
+	mop = netbk->tx_map_ops;
 	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-		!list_empty(&net_schedule_list)) {
+		!list_empty(&netbk->net_schedule_list)) {
 		struct xen_netif *netif;
 		struct xen_netif_tx_request txreq;
 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
@@ -1156,9 +1118,11 @@ static unsigned net_tx_build_mops(void)
 		RING_IDX idx;
 		int work_to_do;
 		unsigned int data_len;
+		pending_ring_idx_t index;

 		/* Get a netif from the list with work to do. */
-		netif = list_first_entry(&net_schedule_list, struct xen_netif, list);
+		netif = list_first_entry(&netbk->net_schedule_list,
+				struct xen_netif, list);
 		netif_get(netif);
 		remove_from_net_schedule_list(netif);

@@ -1217,7 +1181,8 @@ static unsigned net_tx_build_mops(void)
 			continue;
 		}

-		pending_idx = pending_ring[pending_index(pending_cons)];
+		index = pending_index(netbk->pending_cons);
+		pending_idx = netbk->pending_ring[index];

 		data_len = (txreq.size > PKT_PROT_LEN &&
 			    ret < MAX_SKB_FRAGS) ?
@@ -1250,9 +1215,9 @@ static unsigned net_tx_build_mops(void)
 				  txreq.gref, netif->domid);
 		mop++;

-		memcpy(&pending_tx_info[pending_idx].req,
+		memcpy(&netbk->pending_tx_info[pending_idx].req,
 		       &txreq, sizeof(txreq));
-		pending_tx_info[pending_idx].netif = netif;
+		netbk->pending_tx_info[pending_idx].netif = netif;
 		*((u16 *)skb->data) = pending_idx;

 		__skb_put(skb, data_len);
@@ -1267,20 +1232,20 @@ static unsigned net_tx_build_mops(void)
 			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
 		}

-		__skb_queue_tail(&tx_queue, skb);
+		__skb_queue_tail(&netbk->tx_queue, skb);

-		pending_cons++;
+		netbk->pending_cons++;

 		mop = netbk_get_requests(netif, skb, txfrags, mop);

 		netif->tx.req_cons = idx;
 		netif_schedule_work(netif);

-		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
 			break;
 	}

-	return mop - tx_map_ops;
+	return mop - netbk->tx_map_ops;
 }

 static void net_tx_submit(void)
@@ -1288,16 +1253,16 @@ static void net_tx_submit(void)
 	struct gnttab_map_grant_ref *mop;
 	struct sk_buff *skb;

-	mop = tx_map_ops;
-	while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+	mop = netbk->tx_map_ops;
+	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
 		struct xen_netif_tx_request *txp;
 		struct xen_netif *netif;
 		u16 pending_idx;
 		unsigned data_len;

 		pending_idx = *((u16 *)skb->data);
-		netif       = pending_tx_info[pending_idx].netif;
-		txp         = &pending_tx_info[pending_idx].req;
+		netif = netbk->pending_tx_info[pending_idx].netif;
+		txp = &netbk->pending_tx_info[pending_idx].req;

 		/* Check the remap error code. */
 		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
@@ -1363,12 +1328,13 @@ static void net_tx_submit(void)
 	}

 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-	    !list_empty(&pending_inuse_head)) {
+	    !list_empty(&netbk->pending_inuse_head)) {
 		struct netbk_tx_pending_inuse *oldest;

-		oldest = list_entry(pending_inuse_head.next,
+		oldest = list_entry(netbk->pending_inuse_head.next,
 				    struct netbk_tx_pending_inuse, list);
-		mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
+		mod_timer(&netbk->netbk_tx_pending_timer,
+				oldest->alloc_time + HZ);
 	}
 }

@@ -1378,7 +1344,7 @@ static void net_tx_action(unsigned long unused)
 	unsigned nr_mops;
 	int ret;

-	if (dealloc_cons != dealloc_prod)
+	if (netbk->dealloc_cons != netbk->dealloc_prod)
 		net_tx_action_dealloc();

 	nr_mops = net_tx_build_mops();
@@ -1387,7 +1353,7 @@ static void net_tx_action(unsigned long unused)
 		return;

 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-					tx_map_ops, nr_mops);
+					netbk->tx_map_ops, nr_mops);
 	BUG_ON(ret);

 	net_tx_submit();
@@ -1397,15 +1363,17 @@ static void netif_idx_release(u16 pending_idx)
 {
 	static DEFINE_SPINLOCK(_lock);
 	unsigned long flags;
+	pending_ring_idx_t index;

 	spin_lock_irqsave(&_lock, flags);
-	dealloc_ring[pending_index(dealloc_prod)] = pending_idx;
+	index = pending_index(netbk->dealloc_prod);
+	netbk->dealloc_ring[index] = pending_idx;
 	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
 	smp_wmb();
-	dealloc_prod++;
+	netbk->dealloc_prod++;
 	spin_unlock_irqrestore(&_lock, flags);

-	tasklet_schedule(&net_tx_tasklet);
+	tasklet_schedule(&netbk->net_tx_tasklet);
 }

 static void netif_page_release(struct page *page, unsigned int order)
@@ -1481,9 +1449,9 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
 	int i = 0;

 	printk(KERN_ALERT "netif_schedule_list:\n");
-	spin_lock_irq(&net_schedule_list_lock);
+	spin_lock_irq(&netbk->net_schedule_list_lock);

-	list_for_each (ent, &net_schedule_list) {
+	list_for_each(ent, &netbk->net_schedule_list) {
 		netif = list_entry(ent, struct xen_netif, list);
 		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
 		       "rx_resp_prod=%08x\n",
@@ -1500,7 +1468,7 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
 		i++;
 	}

-	spin_unlock_irq(&net_schedule_list_lock);
+	spin_unlock_irq(&netbk->net_schedule_list_lock);
 	printk(KERN_ALERT " ** End of netif_schedule_list **\n");

 	return IRQ_HANDLED;
@@ -1516,37 +1484,53 @@ static int __init netback_init(void)
 	if (!xen_domain())
 		return -ENODEV;

+	netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk));
+	if (!netbk) {
+		printk(KERN_ALERT "%s: out of memory\n", __func__);
+		return -ENOMEM;
+	}
+
 	/* We can increase reservation by this much in net_rx_action(). */
 //	balloon_update_driver_allowance(NET_RX_RING_SIZE);

-	skb_queue_head_init(&rx_queue);
-	skb_queue_head_init(&tx_queue);
+	skb_queue_head_init(&netbk->rx_queue);
+	skb_queue_head_init(&netbk->tx_queue);

-	init_timer(&net_timer);
-	net_timer.data = 0;
-	net_timer.function = net_alarm;
+	init_timer(&netbk->net_timer);
+	netbk->net_timer.data = 0;
+	netbk->net_timer.function = net_alarm;

-	init_timer(&netbk_tx_pending_timer);
-	netbk_tx_pending_timer.data = 0;
-	netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
+	init_timer(&netbk->netbk_tx_pending_timer);
+	netbk->netbk_tx_pending_timer.data = 0;
+	netbk->netbk_tx_pending_timer.function = netbk_tx_pending_timeout;

-	mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-	if (mmap_pages == NULL) {
-		printk("%s: out of memory\n", __FUNCTION__);
-		return -ENOMEM;
+	netbk->mmap_pages =
+		alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+	if (!netbk->mmap_pages) {
+		printk(KERN_ALERT "%s: out of memory\n", __func__);
+		rc = -ENOMEM;
+		goto failed_init2;
 	}

 	for (i = 0; i < MAX_PENDING_REQS; i++) {
-		page = mmap_pages[i];
+		page = netbk->mmap_pages[i];
 		SetPageForeign(page, netif_page_release);
 		netif_set_page_index(page, i);
-		INIT_LIST_HEAD(&pending_inuse[i].list);
+		INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
 	}

-	pending_cons = 0;
-	pending_prod = MAX_PENDING_REQS;
+	netbk->pending_cons = 0;
+	netbk->pending_prod = MAX_PENDING_REQS;
 	for (i = 0; i < MAX_PENDING_REQS; i++)
-		pending_ring[i] = i;
+		netbk->pending_ring[i] = i;
+
+	tasklet_init(&netbk->net_tx_tasklet, net_tx_action, 0);
+	tasklet_init(&netbk->net_rx_tasklet, net_rx_action, 0);
+
+	INIT_LIST_HEAD(&netbk->pending_inuse_head);
+	INIT_LIST_HEAD(&netbk->net_schedule_list);
+
+	spin_lock_init(&netbk->net_schedule_list_lock);

 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
 	if (MODPARM_copy_skb) {
@@ -1561,7 +1545,7 @@ static int __init netback_init(void)

 	rc = netif_xenbus_init();
 	if (rc)
-		goto failed_init;
+		goto failed_init1;

 #ifdef NETBE_DEBUG_INTERRUPT
 	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
@@ -1574,10 +1558,12 @@ static int __init netback_init(void)

 	return 0;

-failed_init:
-	free_empty_pages_and_pagevec(mmap_pages, MAX_PENDING_REQS);
-	del_timer(&netbk_tx_pending_timer);
-	del_timer(&net_timer);
+failed_init1:
+	free_empty_pages_and_pagevec(netbk->mmap_pages, MAX_PENDING_REQS);
+failed_init2:
+	del_timer(&netbk->netbk_tx_pending_timer);
+	del_timer(&netbk->net_timer);
+	vfree(netbk);
 	return rc;

 }
--
1.7.4


From c099c22d8b1c12fc7d68998982eb4ccd4918e813 Mon Sep 17 00:00:00 2001
From: Dongxiao Xu <dongxiao.xu@intel.com>
Date: Wed, 19 May 2010 16:58:57 -0700
Subject: [PATCH 095/197] xen: netback: Introduce a new struct type page_ext.

struct page_ext is used to store the group and idx information by
which a specified page could be identified.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h  |   15 +++++++++++++++
 drivers/xen/netback/netback.c |   28 +++++++++++++++++-----------
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 00208f4..5e0e467 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -240,6 +240,21 @@ struct netbk_tx_pending_inuse {

 #define MAX_PENDING_REQS 256

+/* extra field used in struct page */
+union page_ext {
+	struct {
+#if BITS_PER_LONG < 64
+#define IDX_WIDTH   8
+#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
+		unsigned int group:GROUP_WIDTH;
+		unsigned int idx:IDX_WIDTH;
+#else
+		unsigned int group, idx;
+#endif
+	} e;
+	void *mapping;
+};
+
 struct xen_netbk {
 	struct tasklet_struct net_tx_tasklet;
 	struct tasklet_struct net_rx_tasklet;
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 417f497..71ec999 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -76,22 +76,27 @@ static inline unsigned long idx_to_kaddr(unsigned int idx)
 }

 /* extra field used in struct page */
-static inline void netif_set_page_index(struct page *pg, unsigned int index)
+static inline void netif_set_page_ext(struct page *pg, unsigned int group,
+		unsigned int idx)
 {
-	*(unsigned long *)&pg->mapping = index + 1;
+	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
+
+	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
+	pg->mapping = ext.mapping;
 }

-static inline int netif_page_index(struct page *pg)
+static inline unsigned int netif_page_group(const struct page *pg)
 {
-	unsigned long idx = (unsigned long)pg->mapping - 1;
+	union page_ext ext = { .mapping = pg->mapping };

-	if (!PageForeign(pg))
-		return -1;
+	return ext.e.group - 1;
+}

-	if ((idx >= MAX_PENDING_REQS) || (netbk->mmap_pages[idx] != pg))
-		return -1;
+static inline unsigned int netif_page_index(const struct page *pg)
+{
+	union page_ext ext = { .mapping = pg->mapping };

-	return idx;
+	return ext.e.idx;
 }

 /*
@@ -1380,7 +1385,8 @@ static void netif_page_release(struct page *page, unsigned int order)
 {
 	int idx = netif_page_index(page);
 	BUG_ON(order);
-	BUG_ON(idx < 0);
+	BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
+	BUG_ON(netbk->mmap_pages[idx] != page);
 	netif_idx_release(idx);
 }

@@ -1515,7 +1521,7 @@ static int __init netback_init(void)
 	for (i = 0; i < MAX_PENDING_REQS; i++) {
 		page = netbk->mmap_pages[i];
 		SetPageForeign(page, netif_page_release);
-		netif_set_page_index(page, i);
+		netif_set_page_ext(page, 0, i);
 		INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
 	}

--
1.7.4


From 9534985c5b9cc3f6238d6cb8bba7d376e82039d3 Mon Sep 17 00:00:00 2001
From: Dongxiao Xu <dongxiao.xu@intel.com>
Date: Wed, 19 May 2010 17:08:21 -0700
Subject: [PATCH 096/197] xen: netback: Multiple tasklets support.

Now netback uses one pair of tasklets for Tx/Rx data transaction.
Netback tasklet could only run at one CPU at a time, and it is
used to serve all the netfronts. Therefore it has become a
performance bottle neck. This patch is to use multiple tasklet
pairs to replace the current single pair in dom0.

Assuming that Dom0 has CPUNR VCPUs, we define CPUNR kinds of
tasklets pair (CPUNR for Tx, and CPUNR for Rx). Each pare of
tasklets serve specific group of netfronts. Also for those global
and static variables, we duplicated them for each group in
order to avoid the spinlock.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h    |    6 +
 drivers/xen/netback/interface.c |   27 ++++
 drivers/xen/netback/netback.c   |  270 ++++++++++++++++++++++++---------------
 3 files changed, 197 insertions(+), 106 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 5e0e467..847ba58 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -58,6 +58,7 @@
 struct xen_netif {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
+	int              group;
 	unsigned int     handle;

 	u8               fe_dev_addr[6];
@@ -278,6 +279,8 @@ struct xen_netbk {
 	/* Protect the net_schedule_list in netif. */
 	spinlock_t net_schedule_list_lock;

+	atomic_t netfront_count;
+
 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
 	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
 	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
@@ -296,4 +299,7 @@ struct xen_netbk {
 	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
 };

+extern struct xen_netbk *xen_netbk;
+extern int xen_netbk_group_nr;
+
 #endif /* __NETIF__BACKEND__COMMON_H__ */
diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index 086d939..172ef4c 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -54,8 +54,33 @@
 static unsigned long netbk_queue_length = 32;
 module_param_named(queue_length, netbk_queue_length, ulong, 0644);

+static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
+			   struct xen_netif *netif)
+{
+	int i;
+	int min_netfront_count;
+	int min_group = 0;
+	min_netfront_count = atomic_read(&netbk[0].netfront_count);
+	for (i = 0; i < group_nr; i++) {
+		int netfront_count = atomic_read(&netbk[i].netfront_count);
+		if (netfront_count < min_netfront_count) {
+			min_group = i;
+			min_netfront_count = netfront_count;
+		}
+	}
+
+	netif->group = min_group;
+	atomic_inc(&netbk[netif->group].netfront_count);
+}
+
+static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
+{
+	atomic_dec(&netbk[netif->group].netfront_count);
+}
+
 static void __netif_up(struct xen_netif *netif)
 {
+	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
 	enable_irq(netif->irq);
 	netif_schedule_work(netif);
 }
@@ -64,6 +89,7 @@ static void __netif_down(struct xen_netif *netif)
 {
 	disable_irq(netif->irq);
 	netif_deschedule_work(netif);
+	netbk_remove_netif(xen_netbk, netif);
 }

 static int net_open(struct net_device *dev)
@@ -214,6 +240,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
 	netif = netdev_priv(dev);
 	memset(netif, 0, sizeof(*netif));
 	netif->domid  = domid;
+	netif->group  = -1;
 	netif->handle = handle;
 	netif->features = NETIF_F_SG;
 	atomic_set(&netif->refcnt, 1);
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 71ec999..feefb14 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -48,9 +48,10 @@

 /*define NETBE_DEBUG_INTERRUPT*/

-static struct xen_netbk *netbk;
+struct xen_netbk *xen_netbk;
+int xen_netbk_group_nr;

-static void netif_idx_release(u16 pending_idx);
+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
 static void make_tx_response(struct xen_netif *netif,
 			     struct xen_netif_tx_request *txp,
 			     s8       st);
@@ -61,18 +62,20 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
 					     u16      size,
 					     u16      flags);

-static void net_tx_action(unsigned long unused);
+static void net_tx_action(unsigned long data);

-static void net_rx_action(unsigned long unused);
+static void net_rx_action(unsigned long data);

-static inline unsigned long idx_to_pfn(unsigned int idx)
+static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
+				       unsigned int idx)
 {
 	return page_to_pfn(netbk->mmap_pages[idx]);
 }

-static inline unsigned long idx_to_kaddr(unsigned int idx)
+static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
+					 unsigned int idx)
 {
-	return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
+	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
 }

 /* extra field used in struct page */
@@ -112,7 +115,7 @@ static inline pending_ring_idx_t pending_index(unsigned i)
 	return i & (MAX_PENDING_REQS-1);
 }

-static inline pending_ring_idx_t nr_pending_reqs(void)
+static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
 {
 	return MAX_PENDING_REQS -
 		netbk->pending_prod + netbk->pending_cons;
@@ -125,10 +128,10 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");

 int netbk_copy_skb_mode;

-static inline void maybe_schedule_tx_action(void)
+static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
 {
 	smp_mb();
-	if ((nr_pending_reqs() < (MAX_PENDING_REQS/2)) &&
+	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
 	    !list_empty(&netbk->net_schedule_list))
 		tasklet_schedule(&netbk->net_tx_tasklet);
 }
@@ -235,9 +238,15 @@ static void tx_queue_callback(unsigned long data)
 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xen_netif *netif = netdev_priv(dev);
+	struct xen_netbk *netbk;

 	BUG_ON(skb->dev != dev);

+	if (netif->group == -1)
+		goto drop;
+
+	netbk = &xen_netbk[netif->group];
+
 	/* Drop the packet if the target domain has no receive buffers. */
 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
 		goto drop;
@@ -313,6 +322,7 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
 	struct gnttab_copy *copy_gop;
 	struct xen_netif_rx_request *req;
 	unsigned long old_mfn;
+	int group = netif_page_group(page);
 	int idx = netif_page_index(page);

 	old_mfn = virt_to_mfn(page_address(page));
@@ -321,7 +331,8 @@ static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,

 	copy_gop = npo->copy + npo->copy_prod++;
 	copy_gop->flags = GNTCOPY_dest_gref;
-	if (idx > -1) {
+	if (PageForeign(page)) {
+		struct xen_netbk *netbk = &xen_netbk[group];
 		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
 		copy_gop->source.domid = src_pend->netif->domid;
 		copy_gop->source.u.ref = src_pend->req.gref;
@@ -422,9 +433,10 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
 	}
 }

-static void net_rx_action(unsigned long unused)
+static void net_rx_action(unsigned long data)
 {
 	struct xen_netif *netif = NULL;
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	s8 status;
 	u16 id, irq, flags;
 	struct xen_netif_rx_response *resp;
@@ -584,13 +596,15 @@ static void net_rx_action(unsigned long unused)
 		tasklet_schedule(&netbk->net_rx_tasklet);
 }

-static void net_alarm(unsigned long unused)
+static void net_alarm(unsigned long data)
 {
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	tasklet_schedule(&netbk->net_rx_tasklet);
 }

-static void netbk_tx_pending_timeout(unsigned long unused)
+static void netbk_tx_pending_timeout(unsigned long data)
 {
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	tasklet_schedule(&netbk->net_tx_tasklet);
 }

@@ -607,6 +621,7 @@ static int __on_net_schedule_list(struct xen_netif *netif)

 static void remove_from_net_schedule_list(struct xen_netif *netif)
 {
+	struct xen_netbk *netbk = &xen_netbk[netif->group];
 	spin_lock_irq(&netbk->net_schedule_list_lock);
 	if (likely(__on_net_schedule_list(netif))) {
 		list_del_init(&netif->list);
@@ -617,6 +632,7 @@ static void remove_from_net_schedule_list(struct xen_netif *netif)

 static void add_to_net_schedule_list_tail(struct xen_netif *netif)
 {
+	struct xen_netbk *netbk = &xen_netbk[netif->group];
 	if (__on_net_schedule_list(netif))
 		return;

@@ -631,13 +647,14 @@ static void add_to_net_schedule_list_tail(struct xen_netif *netif)

 void netif_schedule_work(struct xen_netif *netif)
 {
+	struct xen_netbk *netbk = &xen_netbk[netif->group];
 	int more_to_do;

 	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);

 	if (more_to_do) {
 		add_to_net_schedule_list_tail(netif);
-		maybe_schedule_tx_action();
+		maybe_schedule_tx_action(netbk);
 	}
 }

@@ -674,14 +691,15 @@ static void tx_credit_callback(unsigned long data)
 	netif_schedule_work(netif);
 }

-static inline int copy_pending_req(pending_ring_idx_t pending_idx)
+static inline int copy_pending_req(struct xen_netbk *netbk,
+				   pending_ring_idx_t pending_idx)
 {
 	return gnttab_copy_grant_page(
 			netbk->grant_tx_handle[pending_idx],
 			&netbk->mmap_pages[pending_idx]);
 }

-inline static void net_tx_action_dealloc(void)
+static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
 {
 	struct netbk_tx_pending_inuse *inuse, *n;
 	struct gnttab_unmap_grant_ref *gop;
@@ -711,13 +729,13 @@ inline static void net_tx_action_dealloc(void)
 			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
 			list_move_tail(&pending_inuse[pending_idx].list, &list);

-			pfn = idx_to_pfn(pending_idx);
+			pfn = idx_to_pfn(netbk, pending_idx);
 			/* Already unmapped? */
 			if (!phys_to_machine_mapping_valid(pfn))
 				continue;

 			gnttab_set_unmap_op(gop,
-					idx_to_kaddr(pending_idx),
+					idx_to_kaddr(netbk, pending_idx),
 					GNTMAP_host_map,
 					netbk->grant_tx_handle[pending_idx]);
 			gop++;
@@ -740,7 +758,7 @@ inline static void net_tx_action_dealloc(void)

 			pending_tx_info[pending_idx].netif->nr_copied_skbs++;

-			switch (copy_pending_req(pending_idx)) {
+			switch (copy_pending_req(netbk, pending_idx)) {
 			case 0:
 				list_move_tail(&inuse->list, &list);
 				continue;
@@ -843,7 +861,8 @@ static int netbk_count_requests(struct xen_netif *netif,
 	return frags;
 }

-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
+static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+						  struct xen_netif *netif,
 						  struct sk_buff *skb,
 						  struct xen_netif_tx_request *txp,
 						  struct gnttab_map_grant_ref *mop)
@@ -864,7 +883,7 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
 		index = pending_index(netbk->pending_cons++);
 		pending_idx = netbk->pending_ring[index];

-		gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
+		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
 				  GNTMAP_host_map | GNTMAP_readonly,
 				  txp->gref, netif->domid);

@@ -877,8 +896,9 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netif *netif,
 	return mop;
 }

-static int netbk_tx_check_mop(struct sk_buff *skb,
-			       struct gnttab_map_grant_ref **mopp)
+static int netbk_tx_check_mop(struct xen_netbk *netbk,
+			      struct sk_buff *skb,
+			      struct gnttab_map_grant_ref **mopp)
 {
 	struct gnttab_map_grant_ref *mop = *mopp;
 	int pending_idx = *((u16 *)skb->data);
@@ -900,7 +920,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
 		netif_put(netif);
 	} else {
 		set_phys_to_machine(
-			__pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
+			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
 			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
 		netbk->grant_tx_handle[pending_idx] = mop->handle;
 	}
@@ -918,14 +938,14 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
 		newerr = (++mop)->status;
 		if (likely(!newerr)) {
 			unsigned long addr;
-			addr = idx_to_kaddr(pending_idx);
+			addr = idx_to_kaddr(netbk, pending_idx);
 			set_phys_to_machine(
 				__pa(addr)>>PAGE_SHIFT,
 				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
 			netbk->grant_tx_handle[pending_idx] = mop->handle;
 			/* Had a previous error? Invalidate this fragment. */
 			if (unlikely(err))
-				netif_idx_release(pending_idx);
+				netif_idx_release(netbk, pending_idx);
 			continue;
 		}

@@ -942,10 +962,10 @@ static int netbk_tx_check_mop(struct sk_buff *skb,

 		/* First error: invalidate header and preceding fragments. */
 		pending_idx = *((u16 *)skb->data);
-		netif_idx_release(pending_idx);
+		netif_idx_release(netbk, pending_idx);
 		for (j = start; j < i; j++) {
 			pending_idx = (unsigned long)shinfo->frags[i].page;
-			netif_idx_release(pending_idx);
+			netif_idx_release(netbk, pending_idx);
 		}

 		/* Remember the error: invalidate all subsequent fragments. */
@@ -956,7 +976,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
 	return err;
 }

-static void netbk_fill_frags(struct sk_buff *skb)
+static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	int nr_frags = shinfo->nr_frags;
@@ -974,7 +994,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
 			      &netbk->pending_inuse_head);

 		txp = &netbk->pending_tx_info[pending_idx].req;
-		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
+		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
 		frag->size = txp->size;
 		frag->page_offset = txp->offset;

@@ -1106,14 +1126,14 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
 	return false;
 }

-static unsigned net_tx_build_mops(void)
+static unsigned net_tx_build_mops(struct xen_netbk *netbk)
 {
 	struct gnttab_map_grant_ref *mop;
 	struct sk_buff *skb;
 	int ret;

 	mop = netbk->tx_map_ops;
-	while (((nr_pending_reqs() + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
 		!list_empty(&netbk->net_schedule_list)) {
 		struct xen_netif *netif;
 		struct xen_netif_tx_request txreq;
@@ -1215,7 +1235,7 @@ static unsigned net_tx_build_mops(void)
 			}
 		}

-		gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
+		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
 				  GNTMAP_host_map | GNTMAP_readonly,
 				  txreq.gref, netif->domid);
 		mop++;
@@ -1241,7 +1261,7 @@ static unsigned net_tx_build_mops(void)

 		netbk->pending_cons++;

-		mop = netbk_get_requests(netif, skb, txfrags, mop);
+		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);

 		netif->tx.req_cons = idx;
 		netif_schedule_work(netif);
@@ -1253,7 +1273,7 @@ static unsigned net_tx_build_mops(void)
 	return mop - netbk->tx_map_ops;
 }

-static void net_tx_submit(void)
+static void net_tx_submit(struct xen_netbk *netbk)
 {
 	struct gnttab_map_grant_ref *mop;
 	struct sk_buff *skb;
@@ -1270,7 +1290,7 @@ static void net_tx_submit(void)
 		txp = &netbk->pending_tx_info[pending_idx].req;

 		/* Check the remap error code. */
-		if (unlikely(netbk_tx_check_mop(skb, &mop))) {
+		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
 			DPRINTK("netback grant failed.\n");
 			skb_shinfo(skb)->nr_frags = 0;
 			kfree_skb(skb);
@@ -1279,7 +1299,7 @@ static void net_tx_submit(void)

 		data_len = skb->len;
 		memcpy(skb->data,
-		       (void *)(idx_to_kaddr(pending_idx)|txp->offset),
+		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
 		       data_len);
 		if (data_len < txp->size) {
 			/* Append the packet payload as a fragment. */
@@ -1287,7 +1307,7 @@ static void net_tx_submit(void)
 			txp->size -= data_len;
 		} else {
 			/* Schedule a response immediately. */
-			netif_idx_release(pending_idx);
+			netif_idx_release(netbk, pending_idx);
 		}

 		if (txp->flags & NETTXF_csum_blank)
@@ -1295,7 +1315,7 @@ static void net_tx_submit(void)
 		else if (txp->flags & NETTXF_data_validated)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;

-		netbk_fill_frags(skb);
+		netbk_fill_frags(netbk, skb);

 		/*
 		 * If the initial fragment was < PKT_PROT_LEN then
@@ -1344,15 +1364,16 @@ static void net_tx_submit(void)
 }

 /* Called after netfront has transmitted */
-static void net_tx_action(unsigned long unused)
+static void net_tx_action(unsigned long data)
 {
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	unsigned nr_mops;
 	int ret;

 	if (netbk->dealloc_cons != netbk->dealloc_prod)
-		net_tx_action_dealloc();
+		net_tx_action_dealloc(netbk);

-	nr_mops = net_tx_build_mops();
+	nr_mops = net_tx_build_mops(netbk);

 	if (nr_mops == 0)
 		return;
@@ -1361,10 +1382,10 @@ static void net_tx_action(unsigned long unused)
 					netbk->tx_map_ops, nr_mops);
 	BUG_ON(ret);

-	net_tx_submit();
+	net_tx_submit(netbk);
 }

-static void netif_idx_release(u16 pending_idx)
+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
 {
 	static DEFINE_SPINLOCK(_lock);
 	unsigned long flags;
@@ -1383,19 +1404,28 @@ static void netif_idx_release(u16 pending_idx)

 static void netif_page_release(struct page *page, unsigned int order)
 {
+	int group = netif_page_group(page);
 	int idx = netif_page_index(page);
+	struct xen_netbk *netbk = &xen_netbk[group];
 	BUG_ON(order);
+	BUG_ON(group < 0 || group >= xen_netbk_group_nr);
 	BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
 	BUG_ON(netbk->mmap_pages[idx] != page);
-	netif_idx_release(idx);
+	netif_idx_release(netbk, idx);
 }

 irqreturn_t netif_be_int(int irq, void *dev_id)
 {
 	struct xen_netif *netif = dev_id;
+	struct xen_netbk *netbk;
+
+	if (netif->group == -1)
+		return IRQ_NONE;
+
+	netbk = &xen_netbk[netif->group];

 	add_to_net_schedule_list_tail(netif);
-	maybe_schedule_tx_action();
+	maybe_schedule_tx_action(netbk);

 	if (netif_schedulable(netif) && !netbk_queue_full(netif))
 		netif_wake_queue(netif->dev);
@@ -1453,28 +1483,40 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
 	struct list_head *ent;
 	struct xen_netif *netif;
 	int i = 0;
+	int group = 0;

 	printk(KERN_ALERT "netif_schedule_list:\n");
-	spin_lock_irq(&netbk->net_schedule_list_lock);

-	list_for_each(ent, &netbk->net_schedule_list) {
-		netif = list_entry(ent, struct xen_netif, list);
-		printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
-		       "rx_resp_prod=%08x\n",
-		       i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
-		printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
-		       netif->tx.req_cons, netif->tx.rsp_prod_pvt);
-		printk(KERN_ALERT "   shared(rx_req_prod=%08x "
-		       "rx_resp_prod=%08x\n",
-		       netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
-		printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
-		       netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
-		printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
-		       netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
-		i++;
+	for (group = 0; group < xen_netbk_group_nr; group++) {
+		struct xen_netbk *netbk = &xen_netbk[group];
+		spin_lock_irq(&netbk->net_schedule_list_lock);
+		printk(KERN_ALERT "xen_netback group number: %d\n", group);
+		list_for_each(ent, &netbk->net_schedule_list) {
+			netif = list_entry(ent, struct xen_netif, list);
+			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+				"rx_resp_prod=%08x\n",
+				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
+			printk(KERN_ALERT
+				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
+				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
+			printk(KERN_ALERT
+				"   shared(rx_req_prod=%08x "
+				"rx_resp_prod=%08x\n",
+				netif->rx.sring->req_prod,
+				netif->rx.sring->rsp_prod);
+			printk(KERN_ALERT
+				"   rx_event=%08x, tx_req_prod=%08x\n",
+				netif->rx.sring->rsp_event,
+				netif->tx.sring->req_prod);
+			printk(KERN_ALERT
+				"   tx_resp_prod=%08x, tx_event=%08x)\n",
+				netif->tx.sring->rsp_prod,
+				netif->tx.sring->rsp_event);
+			i++;
+		}
+		spin_unlock_irq(&netbk->net_schedule_list_lock);
 	}

-	spin_unlock_irq(&netbk->net_schedule_list_lock);
 	printk(KERN_ALERT " ** End of netif_schedule_list **\n");

 	return IRQ_HANDLED;
@@ -1486,12 +1528,15 @@ static int __init netback_init(void)
 	int i;
 	struct page *page;
 	int rc = 0;
+	int group;

 	if (!xen_domain())
 		return -ENODEV;

-	netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk));
-	if (!netbk) {
+	xen_netbk_group_nr = num_online_cpus();
+	xen_netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk) *
+					    xen_netbk_group_nr);
+	if (!xen_netbk) {
 		printk(KERN_ALERT "%s: out of memory\n", __func__);
 		return -ENOMEM;
 	}
@@ -1499,44 +1544,54 @@ static int __init netback_init(void)
 	/* We can increase reservation by this much in net_rx_action(). */
 //	balloon_update_driver_allowance(NET_RX_RING_SIZE);

-	skb_queue_head_init(&netbk->rx_queue);
-	skb_queue_head_init(&netbk->tx_queue);
-
-	init_timer(&netbk->net_timer);
-	netbk->net_timer.data = 0;
-	netbk->net_timer.function = net_alarm;
-
-	init_timer(&netbk->netbk_tx_pending_timer);
-	netbk->netbk_tx_pending_timer.data = 0;
-	netbk->netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
+	for (group = 0; group < xen_netbk_group_nr; group++) {
+		struct xen_netbk *netbk = &xen_netbk[group];
+		skb_queue_head_init(&netbk->rx_queue);
+		skb_queue_head_init(&netbk->tx_queue);
+
+		init_timer(&netbk->net_timer);
+		netbk->net_timer.data = (unsigned long)netbk;
+		netbk->net_timer.function = net_alarm;
+
+		init_timer(&netbk->netbk_tx_pending_timer);
+		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
+		netbk->netbk_tx_pending_timer.function =
+			netbk_tx_pending_timeout;
+
+		netbk->mmap_pages =
+			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+		if (!netbk->mmap_pages) {
+			printk(KERN_ALERT "%s: out of memory\n", __func__);
+			del_timer(&netbk->netbk_tx_pending_timer);
+			del_timer(&netbk->net_timer);
+			rc = -ENOMEM;
+			goto failed_init;
+		}

-	netbk->mmap_pages =
-		alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-	if (!netbk->mmap_pages) {
-		printk(KERN_ALERT "%s: out of memory\n", __func__);
-		rc = -ENOMEM;
-		goto failed_init2;
-	}
+		for (i = 0; i < MAX_PENDING_REQS; i++) {
+			page = netbk->mmap_pages[i];
+			SetPageForeign(page, netif_page_release);
+			netif_set_page_ext(page, group, i);
+			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+		}

-	for (i = 0; i < MAX_PENDING_REQS; i++) {
-		page = netbk->mmap_pages[i];
-		SetPageForeign(page, netif_page_release);
-		netif_set_page_ext(page, 0, i);
-		INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
-	}
+		netbk->pending_cons = 0;
+		netbk->pending_prod = MAX_PENDING_REQS;
+		for (i = 0; i < MAX_PENDING_REQS; i++)
+			netbk->pending_ring[i] = i;

-	netbk->pending_cons = 0;
-	netbk->pending_prod = MAX_PENDING_REQS;
-	for (i = 0; i < MAX_PENDING_REQS; i++)
-		netbk->pending_ring[i] = i;
+		tasklet_init(&netbk->net_tx_tasklet, net_tx_action,
+				(unsigned long)netbk);
+		tasklet_init(&netbk->net_rx_tasklet, net_rx_action,
+				(unsigned long)netbk);

-	tasklet_init(&netbk->net_tx_tasklet, net_tx_action, 0);
-	tasklet_init(&netbk->net_rx_tasklet, net_rx_action, 0);
+		INIT_LIST_HEAD(&netbk->pending_inuse_head);
+		INIT_LIST_HEAD(&netbk->net_schedule_list);

-	INIT_LIST_HEAD(&netbk->pending_inuse_head);
-	INIT_LIST_HEAD(&netbk->net_schedule_list);
+		spin_lock_init(&netbk->net_schedule_list_lock);

-	spin_lock_init(&netbk->net_schedule_list_lock);
+		atomic_set(&netbk->netfront_count, 0);
+	}

 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
 	if (MODPARM_copy_skb) {
@@ -1551,25 +1606,28 @@ static int __init netback_init(void)

 	rc = netif_xenbus_init();
 	if (rc)
-		goto failed_init1;
+		goto failed_init;

 #ifdef NETBE_DEBUG_INTERRUPT
 	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
 				      0,
 				      netif_be_dbg,
-				      SA_SHIRQ,
+				      IRQF_SHARED,
 				      "net-be-dbg",
 				      &netif_be_dbg);
 #endif

 	return 0;

-failed_init1:
-	free_empty_pages_and_pagevec(netbk->mmap_pages, MAX_PENDING_REQS);
-failed_init2:
-	del_timer(&netbk->netbk_tx_pending_timer);
-	del_timer(&netbk->net_timer);
-	vfree(netbk);
+failed_init:
+	for (i = 0; i < group; i++) {
+		struct xen_netbk *netbk = &xen_netbk[i];
+		free_empty_pages_and_pagevec(netbk->mmap_pages,
+				MAX_PENDING_REQS);
+		del_timer(&netbk->netbk_tx_pending_timer);
+		del_timer(&netbk->net_timer);
+	}
+	vfree(xen_netbk);
 	return rc;

 }
--
1.7.4


From e7317b70c0436c109b605bb377939cb2eaff6a6f Mon Sep 17 00:00:00 2001
From: Dongxiao Xu <dongxiao.xu@intel.com>
Date: Wed, 19 May 2010 17:08:22 -0700
Subject: [PATCH 097/197] xen: netback: Use Kernel thread to replace the tasklet.

Kernel thread has more control over QoS, and could improve dom0's
userspace responseness. This option is defaultly off currently.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h  |   13 ++++-
 drivers/xen/netback/netback.c |  109 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 109 insertions(+), 13 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 847ba58..36cb2b9 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -257,8 +257,17 @@ union page_ext {
 };

 struct xen_netbk {
-	struct tasklet_struct net_tx_tasklet;
-	struct tasklet_struct net_rx_tasklet;
+	union {
+		struct {
+			struct tasklet_struct net_tx_tasklet;
+			struct tasklet_struct net_rx_tasklet;
+		} tasklet;
+
+		struct {
+			wait_queue_head_t netbk_action_wq;
+			struct task_struct *task;
+		} kthread;
+	};

 	struct sk_buff_head rx_queue;
 	struct sk_buff_head tx_queue;
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index feefb14..547dcaa 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -38,6 +38,7 @@

 #include <linux/tcp.h>
 #include <linux/udp.h>
+#include <linux/kthread.h>

 #include <xen/balloon.h>
 #include <xen/events.h>
@@ -128,12 +129,31 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");

 int netbk_copy_skb_mode;

+static int MODPARM_netback_kthread;
+module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
+MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
+
+/*
+ * Netback bottom half handler.
+ * dir indicates the data direction.
+ * rx: 1, tx: 0.
+ */
+static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
+{
+	if (MODPARM_netback_kthread)
+		wake_up(&netbk->kthread.netbk_action_wq);
+	else if (dir)
+		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
+	else
+		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
+}
+
 static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
 {
 	smp_mb();
 	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
 	    !list_empty(&netbk->net_schedule_list))
-		tasklet_schedule(&netbk->net_tx_tasklet);
+		xen_netbk_bh_handler(netbk, 0);
 }

 static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
@@ -289,7 +309,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 	skb_queue_tail(&netbk->rx_queue, skb);
-	tasklet_schedule(&netbk->net_rx_tasklet);
+
+	xen_netbk_bh_handler(netbk, 1);

 	return 0;

@@ -593,19 +614,19 @@ static void net_rx_action(unsigned long data)
 	/* More work to do? */
 	if (!skb_queue_empty(&netbk->rx_queue) &&
 			!timer_pending(&netbk->net_timer))
-		tasklet_schedule(&netbk->net_rx_tasklet);
+		xen_netbk_bh_handler(netbk, 1);
 }

 static void net_alarm(unsigned long data)
 {
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
-	tasklet_schedule(&netbk->net_rx_tasklet);
+	xen_netbk_bh_handler(netbk, 1);
 }

 static void netbk_tx_pending_timeout(unsigned long data)
 {
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
-	tasklet_schedule(&netbk->net_tx_tasklet);
+	xen_netbk_bh_handler(netbk, 0);
 }

 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
@@ -1348,7 +1369,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
 			continue;
 		}

-		netif_rx(skb);
+		netif_rx_ni(skb);
 		netif->dev->last_rx = jiffies;
 	}

@@ -1399,7 +1420,7 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
 	netbk->dealloc_prod++;
 	spin_unlock_irqrestore(&_lock, flags);

-	tasklet_schedule(&netbk->net_tx_tasklet);
+	xen_netbk_bh_handler(netbk, 0);
 }

 static void netif_page_release(struct page *page, unsigned int order)
@@ -1523,6 +1544,46 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
 }
 #endif

+static inline int rx_work_todo(struct xen_netbk *netbk)
+{
+	return !skb_queue_empty(&netbk->rx_queue);
+}
+
+static inline int tx_work_todo(struct xen_netbk *netbk)
+{
+	if (netbk->dealloc_cons != netbk->dealloc_prod)
+		return 1;
+
+	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+			!list_empty(&netbk->net_schedule_list))
+		return 1;
+
+	return 0;
+}
+
+static int netbk_action_thread(void *data)
+{
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(netbk->kthread.netbk_action_wq,
+				rx_work_todo(netbk)
+				|| tx_work_todo(netbk)
+				|| kthread_should_stop());
+		cond_resched();
+
+		if (kthread_should_stop())
+			break;
+
+		if (rx_work_todo(netbk))
+			net_rx_action((unsigned long)netbk);
+
+		if (tx_work_todo(netbk))
+			net_tx_action((unsigned long)netbk);
+	}
+
+	return 0;
+}
+
 static int __init netback_init(void)
 {
 	int i;
@@ -1580,10 +1641,34 @@ static int __init netback_init(void)
 		for (i = 0; i < MAX_PENDING_REQS; i++)
 			netbk->pending_ring[i] = i;

-		tasklet_init(&netbk->net_tx_tasklet, net_tx_action,
-				(unsigned long)netbk);
-		tasklet_init(&netbk->net_rx_tasklet, net_rx_action,
-				(unsigned long)netbk);
+		if (MODPARM_netback_kthread) {
+			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
+			netbk->kthread.task =
+				kthread_create(netbk_action_thread,
+					       (void *)netbk,
+					       "netback/%u", group);
+
+			if (!IS_ERR(netbk->kthread.task)) {
+				kthread_bind(netbk->kthread.task, group);
+				wake_up_process(netbk->kthread.task);
+			} else {
+				printk(KERN_ALERT
+					"kthread_run() fails at netback\n");
+				free_empty_pages_and_pagevec(netbk->mmap_pages,
+						MAX_PENDING_REQS);
+				del_timer(&netbk->netbk_tx_pending_timer);
+				del_timer(&netbk->net_timer);
+				rc = PTR_ERR(netbk->kthread.task);
+				goto failed_init;
+			}
+		} else {
+			tasklet_init(&netbk->tasklet.net_tx_tasklet,
+				     net_tx_action,
+				     (unsigned long)netbk);
+			tasklet_init(&netbk->tasklet.net_rx_tasklet,
+				     net_rx_action,
+				     (unsigned long)netbk);
+		}

 		INIT_LIST_HEAD(&netbk->pending_inuse_head);
 		INIT_LIST_HEAD(&netbk->net_schedule_list);
@@ -1626,6 +1711,8 @@ failed_init:
 				MAX_PENDING_REQS);
 		del_timer(&netbk->netbk_tx_pending_timer);
 		del_timer(&netbk->net_timer);
+		if (MODPARM_netback_kthread)
+			kthread_stop(netbk->kthread.task);
 	}
 	vfree(xen_netbk);
 	return rc;
--
1.7.4


From 6359d5939c5d1f59b794cd02e8cdbd36b9f3434d Mon Sep 17 00:00:00 2001
From: James Harper <james.harper@bendigoit.com.au>
Date: Fri, 28 May 2010 23:12:56 -0700
Subject: [PATCH 098/197] xen: netback: avoid null-pointer access in netback_uevent

Check if drvdata has been set up yet and return if it hasn't.

Signed-off-by: James Harper <james.harper@bendigoit.com.au>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/xenbus.c |    9 +++++++--
 1 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index fcd3c34..e30b0c7 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -154,12 +154,17 @@ fail:
  */
 static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
 {
-	struct backend_info *be = dev_get_drvdata(&xdev->dev);
-	struct xen_netif *netif = be->netif;
+	struct backend_info *be;
+	struct xen_netif *netif;
 	char *val;

 	DPRINTK("netback_uevent");

+	be = dev_get_drvdata(&xdev->dev);
+	if (!be)
+		return 0;
+	netif = be->netif;
+
 	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
 	if (IS_ERR(val)) {
 		int err = PTR_ERR(val);
--
1.7.4


From 4a818daa044d9d499412e8f6e2e3086c0521e7b3 Mon Sep 17 00:00:00 2001
From: Keir Fraser <keir.fraser@citrix.com>
Date: Fri, 11 Jun 2010 11:48:30 +0100
Subject: [PATCH 099/197] xen: netback: Fixes for delayed copy of tx network packets.

 - Should call net_tx_action_dealloc() even when dealloc ring is
   empty, as there may in any case be work to do on the
   pending_inuse list.
 - Should not exit directly from the middle of the tx_action tasklet,
   as the tx_pending_timer should always be checked and updated at the
   end of the tasklet.

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
[picked from linux-2.6.18-xen.hg 959:1a97bd686258, ported across a43e2175 "xen/netback: move code around"]
---
 drivers/xen/netback/netback.c |   25 ++++++++++++-------------
 1 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 547dcaa..58dfbd2 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1372,16 +1372,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
 		netif_rx_ni(skb);
 		netif->dev->last_rx = jiffies;
 	}
-
-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-	    !list_empty(&netbk->pending_inuse_head)) {
-		struct netbk_tx_pending_inuse *oldest;
-
-		oldest = list_entry(netbk->pending_inuse_head.next,
-				    struct netbk_tx_pending_inuse, list);
-		mod_timer(&netbk->netbk_tx_pending_timer,
-				oldest->alloc_time + HZ);
-	}
 }

 /* Called after netfront has transmitted */
@@ -1391,19 +1381,28 @@ static void net_tx_action(unsigned long data)
 	unsigned nr_mops;
 	int ret;

-	if (netbk->dealloc_cons != netbk->dealloc_prod)
-		net_tx_action_dealloc(netbk);
+	net_tx_action_dealloc(netbk);

 	nr_mops = net_tx_build_mops(netbk);

 	if (nr_mops == 0)
-		return;
+		goto out;

 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
 					netbk->tx_map_ops, nr_mops);
 	BUG_ON(ret);

 	net_tx_submit(netbk);
+out:
+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+	    !list_empty(&netbk->pending_inuse_head)) {
+		struct netbk_tx_pending_inuse *oldest;
+
+		oldest = list_entry(netbk->pending_inuse_head.next,
+				    struct netbk_tx_pending_inuse, list);
+		mod_timer(&netbk->netbk_tx_pending_timer,
+				oldest->alloc_time + HZ);
+	}
 }

 static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
--
1.7.4


From 48fa1af97e6c9d304c04f70a75de1340e7d79e18 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 11 Jun 2010 10:51:01 +0100
Subject: [PATCH 100/197] xen: netback: handle NET_SKBUFF_DATA_USES_OFFSET correctly

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jan Beulich <JBeulich@novell.com>
---
 drivers/xen/netback/netback.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 58dfbd2..aa094af 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -218,7 +218,11 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
 		len -= copy;
 	}

+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	offset = 0;
+#else
 	offset = nskb->data - skb->data;
+#endif

 	nskb->transport_header = skb->transport_header + offset;
 	nskb->network_header = skb->network_header + offset;
--
1.7.4


From 7d3e6e42251f179e407fa5236f613e5500b3a3ea Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 11 Jun 2010 10:51:01 +0100
Subject: [PATCH 101/197] xen: netback: drop frag member from struct netbk_rx_meta

It has been unused since c3219dc "xen/netback: completely drop flip
support", as has netbk_free_pages().

(Although it now has only a single member struct netbk_rx_meta will
gain other members in a subsequent patch so there is no point
reworking to get rid of the struct)

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/common.h  |    1 -
 drivers/xen/netback/netback.c |    8 --------
 2 files changed, 0 insertions(+), 9 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 36cb2b9..be4fe91 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -230,7 +230,6 @@ struct pending_tx_info {
 typedef unsigned int pending_ring_idx_t;

 struct netbk_rx_meta {
-	skb_frag_t frag;
 	int id;
 };

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index aa094af..9f7e489 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -411,14 +411,6 @@ static void netbk_gop_skb(struct sk_buff *skb,
 	netif->rx.req_cons += nr_frags + extra;
 }

-static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
-{
-	int i;
-
-	for (i = 0; i < nr_frags; i++)
-		put_page(meta[i].frag.page);
-}
-
 /* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
    used to set up the operations on the top of
    netrx_pending_operations, which have since been done.  Check that
--
1.7.4


From 1ced27150d0092c40ebbbbb3896192003d433c0e Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 11 Jun 2010 10:51:01 +0100
Subject: [PATCH 102/197] xen: netback: linearise SKBs as we copy them into guest memory on guest-RX.

There's no point in sending lots of little packets to a copying
receiver if we can instead arrange to copy them all into a single RX
buffer.  We need to copy anyway, so there's no overhead here, and this
is a little bit easier on the receiving domain's network stack.

Based on a patch by Steven Smith. Fixed to not skip unnecessarily to
the next buffer which could leave the head fragment of a received
frame empty if the headlen of an SKB was large (which would crash
netfront). Instead we only try and pack "small enough" fragments
together but do not try to coalesce large or whole page fragments.

In previous iterations of this patch we also tried to only include
2048 bytes per frag because very old netfronts stored other
information in the second half of the page. It has been determined
that only frontends which support scatter-gather are going to come
down this path and that any guest which supports scatter-gather is
also new enough to allow us to use the full page size for each
fragment (since this limitation which fixed as part of the SG
implementation) so we do not need this restriction.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Steven Smith <Steven.Smith@eu.citrix.com>
---
 drivers/xen/netback/common.h  |   15 ++-
 drivers/xen/netback/netback.c |  282 ++++++++++++++++++++++++++++++-----------
 2 files changed, 218 insertions(+), 79 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index be4fe91..9c0c048 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -82,7 +82,9 @@ struct xen_netif {
 	/* Internal feature information. */
 	u8 can_queue:1;	/* can queue packets for receiver? */

-	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
+	/* Allow netif_be_start_xmit() to peek ahead in the rx request
+	 * ring.  This is a prediction of what rx_req_cons will be once
+	 * all queued skbs are put on the ring. */
 	RING_IDX rx_req_cons_peek;

 	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
@@ -231,6 +233,8 @@ typedef unsigned int pending_ring_idx_t;

 struct netbk_rx_meta {
 	int id;
+	int size;
+	int gso_size;
 };

 struct netbk_tx_pending_inuse {
@@ -240,6 +244,8 @@ struct netbk_tx_pending_inuse {

 #define MAX_PENDING_REQS 256

+#define MAX_BUFFER_OFFSET PAGE_SIZE
+
 /* extra field used in struct page */
 union page_ext {
 	struct {
@@ -301,7 +307,12 @@ struct xen_netbk {
 	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
 	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
 	struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
-	struct gnttab_copy grant_copy_op[NET_RX_RING_SIZE];
+	/*
+	 * Each head or fragment can be up to 4096 bytes. Given
+	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
+	 * head/fragment uses 2 copy operation.
+	 */
+	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
 	unsigned char rx_notify[NR_IRQS];
 	u16 notify_list[NET_RX_RING_SIZE];
 	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 9f7e489..d53d88e 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -259,6 +259,48 @@ static void tx_queue_callback(unsigned long data)
 		netif_wake_queue(netif->dev);
 }

+/* Figure out how many ring slots we're going to need to send @skb to
+   the guest. */
+static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+{
+	unsigned count;
+	unsigned copy_off;
+	unsigned i;
+
+	copy_off = 0;
+	count = 1;
+
+	BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
+
+	copy_off = skb_headlen(skb);
+
+	if (skb_shinfo(skb)->gso_size)
+		count++;
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		unsigned long size = skb_shinfo(skb)->frags[i].size;
+		unsigned long bytes;
+		while (size > 0) {
+			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
+
+			/* These checks are the same as in netbk_gop_frag_copy */
+			if (copy_off == MAX_BUFFER_OFFSET
+			    || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
+				count++;
+				copy_off = 0;
+			}
+
+			bytes = size;
+			if (copy_off + bytes > MAX_BUFFER_OFFSET)
+				bytes = MAX_BUFFER_OFFSET - copy_off;
+
+			copy_off += bytes;
+			size -= bytes;
+		}
+	}
+	return count;
+}
+
 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xen_netif *netif = netdev_priv(dev);
@@ -290,8 +332,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		skb = nskb;
 	}

-	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
-				   !!skb_shinfo(skb)->gso_size;
+	/* Reserve ring slots for the worst-case number of
+	 * fragments. */
+	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
 	netif_get(netif);

 	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
@@ -335,96 +378,165 @@ struct netrx_pending_operations {
 	struct gnttab_copy *copy;
 	struct multicall_entry *mcl;
 	struct netbk_rx_meta *meta;
+	int copy_off;
+	grant_ref_t copy_gref;
 };

 /* Set up the grant operations for this fragment.  If it's a flipping
    interface, we also set up the unmap request from here. */
-static u16 netbk_gop_frag(struct xen_netif *netif, struct netbk_rx_meta *meta,
-			  int i, struct netrx_pending_operations *npo,
-			  struct page *page, unsigned long size,
-			  unsigned long offset)
+
+static void netbk_gop_frag_copy(struct xen_netif *netif,
+				struct netrx_pending_operations *npo,
+				struct page *page, unsigned long size,
+				unsigned long offset, int head)
 {
 	struct gnttab_copy *copy_gop;
-	struct xen_netif_rx_request *req;
-	unsigned long old_mfn;
+	struct netbk_rx_meta *meta;
 	int group = netif_page_group(page);
 	int idx = netif_page_index(page);
+	unsigned long bytes;
+
+	/* Data must not cross a page boundary. */
+	BUG_ON(size + offset > PAGE_SIZE);

-	old_mfn = virt_to_mfn(page_address(page));
+	meta = npo->meta + npo->meta_prod - 1;

-	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
+	while (size > 0) {
+		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);

-	copy_gop = npo->copy + npo->copy_prod++;
-	copy_gop->flags = GNTCOPY_dest_gref;
-	if (PageForeign(page)) {
+		/*
+		 * Move to a new receive buffer if:
+		 *
+		 * simple case: we have completely filled the current buffer.
+		 *
+		 * complex case: the current frag would overflow
+		 * the current buffer but only if:
+		 *     (i)   this frag would fit completely in the next buffer
+		 * and (ii)  there is already some data in the current buffer
+		 * and (iii) this is not the head buffer.
+		 *
+		 * Where:
+		 * - (i) stops us splitting a frag into two copies
+		 *   unless the frag is too large for a single buffer.
+		 * - (ii) stops us from leaving a buffer pointlessly empty.
+		 * - (iii) stops us leaving the first buffer
+		 *   empty. Strictly speaking this is already covered
+		 *   by (ii) but is explicitly checked because
+		 *   netfront relies on the first buffer being
+		 *   non-empty and can crash otherwise.
+		 *
+		 * This means we will effectively linearise small
+		 * frags but do not needlessly split large buffers
+		 * into multiple copies tend to give large frags their
+		 * own buffers as before.
+		 */
+		if (npo->copy_off == MAX_BUFFER_OFFSET
+		    || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
+			struct xen_netif_rx_request *req;
+
+			BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
+			/* Overflowed this request, go to the next one */
+			req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+			meta = npo->meta + npo->meta_prod++;
+			meta->size = 0;
+			meta->id = req->id;
+			npo->copy_off = 0;
+			npo->copy_gref = req->gref;
+		}
+
+		bytes = size;
+		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
+			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
+
+		copy_gop = npo->copy + npo->copy_prod++;
+		copy_gop->flags = GNTCOPY_dest_gref;
+		if (PageForeign(page)) {
 		struct xen_netbk *netbk = &xen_netbk[group];
 		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
 		copy_gop->source.domid = src_pend->netif->domid;
 		copy_gop->source.u.ref = src_pend->req.gref;
-		copy_gop->flags |= GNTCOPY_source_gref;
-	} else {
-		copy_gop->source.domid = DOMID_SELF;
-		copy_gop->source.u.gmfn = old_mfn;
-	}
-	copy_gop->source.offset = offset;
-	copy_gop->dest.domid = netif->domid;
-	copy_gop->dest.offset = 0;
-	copy_gop->dest.u.ref = req->gref;
-	copy_gop->len = size;
+			copy_gop->flags |= GNTCOPY_source_gref;
+		} else {
+			copy_gop->source.domid = DOMID_SELF;
+			copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
+		}
+		copy_gop->source.offset = offset;
+		copy_gop->dest.domid = netif->domid;

-	return req->id;
+		copy_gop->dest.offset = npo->copy_off;
+		copy_gop->dest.u.ref = npo->copy_gref;
+		copy_gop->len = bytes;
+
+		npo->copy_off += bytes;
+		meta->size += bytes;
+
+		offset += bytes;
+		size -= bytes;
+		head = 0; /* Must be something in this buffer now */
+	}
 }

-static void netbk_gop_skb(struct sk_buff *skb,
-			  struct netrx_pending_operations *npo)
+/* Prepare an SKB to be transmitted to the frontend.  This is
+   responsible for allocating grant operations, meta structures, etc.
+   It returns the number of meta structures consumed.  The number of
+   ring slots used is always equal to the number of meta slots used
+   plus the number of GSO descriptors used.  Currently, we use either
+   zero GSO descriptors (for non-GSO packets) or one descriptor (for
+   frontend-side LRO). */
+static int netbk_gop_skb(struct sk_buff *skb,
+			 struct netrx_pending_operations *npo)
 {
 	struct xen_netif *netif = netdev_priv(skb->dev);
 	int nr_frags = skb_shinfo(skb)->nr_frags;
 	int i;
-	int extra;
-	struct netbk_rx_meta *head_meta, *meta;
+	struct xen_netif_rx_request *req;
+	struct netbk_rx_meta *meta;
+	int old_meta_prod;
+
+	old_meta_prod = npo->meta_prod;

-	head_meta = npo->meta + npo->meta_prod++;
-	head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
-	head_meta->frag.size = skb_shinfo(skb)->gso_size;
-	extra = !!head_meta->frag.size + 1;
+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+	meta = npo->meta + npo->meta_prod++;
+	meta->gso_size = skb_shinfo(skb)->gso_size;
+	meta->size = 0;
+	meta->id = req->id;
+	npo->copy_off = 0;
+	npo->copy_gref = req->gref;
+
+	netbk_gop_frag_copy(netif,
+			    npo, virt_to_page(skb->data),
+			    skb_headlen(skb),
+			    offset_in_page(skb->data), 1);
+
+	/* Leave a gap for the GSO descriptor. */
+	if (skb_shinfo(skb)->gso_size)
+		netif->rx.req_cons++;

 	for (i = 0; i < nr_frags; i++) {
-		meta = npo->meta + npo->meta_prod++;
-		meta->frag = skb_shinfo(skb)->frags[i];
-		meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
-					  meta->frag.page,
-					  meta->frag.size,
-					  meta->frag.page_offset);
+		netbk_gop_frag_copy(netif, npo,
+				    skb_shinfo(skb)->frags[i].page,
+				    skb_shinfo(skb)->frags[i].size,
+				    skb_shinfo(skb)->frags[i].page_offset,
+				    0);
 	}

-	/*
-	 * This must occur at the end to ensure that we don't trash skb_shinfo
-	 * until we're done. We know that the head doesn't cross a page
-	 * boundary because such packets get copied in netif_be_start_xmit.
-	 */
-	head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
-				       virt_to_page(skb->data),
-				       skb_headlen(skb),
-				       offset_in_page(skb->data));
-
-	netif->rx.req_cons += nr_frags + extra;
+	return npo->meta_prod - old_meta_prod;
 }

 /* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
    used to set up the operations on the top of
    netrx_pending_operations, which have since been done.  Check that
    they didn't give any errors and advance over them. */
-static int netbk_check_gop(int nr_frags, domid_t domid,
+static int netbk_check_gop(int nr_meta_slots, domid_t domid,
 			   struct netrx_pending_operations *npo)
 {
 	struct gnttab_copy     *copy_op;
 	int status = NETIF_RSP_OKAY;
 	int i;

-	for (i = 0; i <= nr_frags; i++) {
-			copy_op = npo->copy + npo->copy_cons++;
-			if (copy_op->status != GNTST_okay) {
+	for (i = 0; i < nr_meta_slots; i++) {
+		copy_op = npo->copy + npo->copy_cons++;
+		if (copy_op->status != GNTST_okay) {
 				DPRINTK("Bad status %d from copy to DOM%d.\n",
 					copy_op->status, domid);
 				status = NETIF_RSP_ERROR;
@@ -435,27 +547,35 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
 }

 static void netbk_add_frag_responses(struct xen_netif *netif, int status,
-				     struct netbk_rx_meta *meta, int nr_frags)
+				     struct netbk_rx_meta *meta,
+				     int nr_meta_slots)
 {
 	int i;
 	unsigned long offset;

-	for (i = 0; i < nr_frags; i++) {
-		int id = meta[i].id;
-		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
-
+	for (i = 0; i < nr_meta_slots; i++) {
+		int flags;
+		if (i == nr_meta_slots - 1)
+			flags = 0;
+		else
+			flags = NETRXF_more_data;
+
 		offset = 0;
-		make_rx_response(netif, id, status, offset,
-				 meta[i].frag.size, flags);
+		make_rx_response(netif, meta[i].id, status, offset,
+				 meta[i].size, flags);
 	}
 }

+struct skb_cb_overlay {
+	int meta_slots_used;
+};
+
 static void net_rx_action(unsigned long data)
 {
 	struct xen_netif *netif = NULL;
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	s8 status;
-	u16 id, irq, flags;
+	u16 irq, flags;
 	struct xen_netif_rx_response *resp;
 	struct multicall_entry *mcl;
 	struct sk_buff_head rxq;
@@ -465,6 +585,7 @@ static void net_rx_action(unsigned long data)
 	int nr_frags;
 	int count;
 	unsigned long offset;
+	struct skb_cb_overlay *sco;

 	struct netrx_pending_operations npo = {
 		.mmu   = netbk->rx_mmu,
@@ -479,10 +600,11 @@ static void net_rx_action(unsigned long data)
 	count = 0;

 	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
+		netif = netdev_priv(skb->dev);
 		nr_frags = skb_shinfo(skb)->nr_frags;
-		*(int *)skb->cb = nr_frags;

-		netbk_gop_skb(skb, &npo);
+		sco = (struct skb_cb_overlay *)skb->cb;
+		sco->meta_slots_used = netbk_gop_skb(skb, &npo);

 		count += nr_frags + 1;

@@ -541,18 +663,20 @@ static void net_rx_action(unsigned long data)
 	BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);

 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-		nr_frags = *(int *)skb->cb;
+		sco = (struct skb_cb_overlay *)skb->cb;

 		netif = netdev_priv(skb->dev);

 		netif->stats.tx_bytes += skb->len;
 		netif->stats.tx_packets++;

-		status = netbk_check_gop(nr_frags, netif->domid, &npo);
-
-		id = netbk->meta[npo.meta_cons].id;
-		flags = nr_frags ? NETRXF_more_data : 0;
+		status = netbk_check_gop(sco->meta_slots_used,
+					 netif->domid, &npo);

+		if (sco->meta_slots_used == 1)
+			flags = 0;
+		else
+			flags = NETRXF_more_data;
 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
@@ -560,10 +684,12 @@ static void net_rx_action(unsigned long data)
 			flags |= NETRXF_data_validated;

 		offset = 0;
-		resp = make_rx_response(netif, id, status, offset,
-					skb_headlen(skb), flags);
+		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
+					status, offset,
+					netbk->meta[npo.meta_cons].size,
+					flags);

-		if (netbk->meta[npo.meta_cons].frag.size) {
+		if (netbk->meta[npo.meta_cons].gso_size) {
 			struct xen_netif_extra_info *gso =
 				(struct xen_netif_extra_info *)
 				RING_GET_RESPONSE(&netif->rx,
@@ -571,7 +697,7 @@ static void net_rx_action(unsigned long data)

 			resp->flags |= NETRXF_extra_info;

-			gso->u.gso.size = netbk->meta[npo.meta_cons].frag.size;
+			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
 			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 			gso->u.gso.pad = 0;
 			gso->u.gso.features = 0;
@@ -580,9 +706,11 @@ static void net_rx_action(unsigned long data)
 			gso->flags = 0;
 		}

-		netbk_add_frag_responses(netif, status,
-				netbk->meta + npo.meta_cons + 1,
-				nr_frags);
+		if (sco->meta_slots_used > 1) {
+			netbk_add_frag_responses(netif, status,
+						 netbk->meta + npo.meta_cons + 1,
+						 sco->meta_slots_used - 1);
+		}

 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
 		irq = netif->irq;
@@ -597,8 +725,8 @@ static void net_rx_action(unsigned long data)
 			netif_wake_queue(netif->dev);

 		netif_put(netif);
+		npo.meta_cons += sco->meta_slots_used;
 		dev_kfree_skb(skb);
-		npo.meta_cons += nr_frags + 1;
 	}

 	while (notify_nr != 0) {
--
1.7.4


From 96069b28e612232fb739ef48d9c2c5178b19f562 Mon Sep 17 00:00:00 2001
From: Dongxiao Xu <dongxiao.xu@intel.com>
Date: Thu, 10 Jun 2010 19:03:15 +0800
Subject: [PATCH 103/197] xen: netback: Set allocated memory to zero from vmalloc.

This should fix the windows/linux pv driver issue.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index d53d88e..c7024d4 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1724,6 +1724,7 @@ static int __init netback_init(void)
 		printk(KERN_ALERT "%s: out of memory\n", __func__);
 		return -ENOMEM;
 	}
+	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);

 	/* We can increase reservation by this much in net_rx_action(). */
 //	balloon_update_driver_allowance(NET_RX_RING_SIZE);
--
1.7.4


From 109a748d1c11b7eeaaacedb08c48bc65640b0bb8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 14 Jun 2010 13:23:33 +0100
Subject: [PATCH 104/197] xen: netback: minor code formatting fixup

Don't include redundant casts from allocation.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index c7024d4..58e920a 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1718,8 +1718,7 @@ static int __init netback_init(void)
 		return -ENODEV;

 	xen_netbk_group_nr = num_online_cpus();
-	xen_netbk = (struct xen_netbk *)vmalloc(sizeof(struct xen_netbk) *
-					    xen_netbk_group_nr);
+	xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
 	if (!xen_netbk) {
 		printk(KERN_ALERT "%s: out of memory\n", __func__);
 		return -ENOMEM;
--
1.7.4


From 2424b59d68ee6ccdb7e52ab68bdba3a8b742513d Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 30 Jun 2010 10:12:49 +0100
Subject: [PATCH 105/197] xen: netback: drop more relics of flipping mode

The mmu_update and gnttab_transfer arrays were only used by flipping
mode. With those gone the multicall now consists of a single call to
GNTTABOP_copy so drop the multicall as well and just make the one
hypercall.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Paul Durrant <paul.durrant@citrix.com>
---
 drivers/xen/netback/common.h  |    3 --
 drivers/xen/netback/netback.c |   55 +++--------------------------------------
 2 files changed, 4 insertions(+), 54 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 9c0c048..08e7a0e 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -304,9 +304,6 @@ struct xen_netbk {
 	u16 pending_ring[MAX_PENDING_REQS];
 	u16 dealloc_ring[MAX_PENDING_REQS];

-	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+3];
-	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
-	struct gnttab_transfer grant_trans_op[NET_RX_RING_SIZE];
 	/*
 	 * Each head or fragment can be up to 4096 bytes. Given
 	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 58e920a..ca65840 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -368,15 +368,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 }

 struct netrx_pending_operations {
-	unsigned trans_prod, trans_cons;
-	unsigned mmu_prod, mmu_mcl;
-	unsigned mcl_prod, mcl_cons;
 	unsigned copy_prod, copy_cons;
 	unsigned meta_prod, meta_cons;
-	struct mmu_update *mmu;
-	struct gnttab_transfer *trans;
 	struct gnttab_copy *copy;
-	struct multicall_entry *mcl;
 	struct netbk_rx_meta *meta;
 	int copy_off;
 	grant_ref_t copy_gref;
@@ -577,7 +571,6 @@ static void net_rx_action(unsigned long data)
 	s8 status;
 	u16 irq, flags;
 	struct xen_netif_rx_response *resp;
-	struct multicall_entry *mcl;
 	struct sk_buff_head rxq;
 	struct sk_buff *skb;
 	int notify_nr = 0;
@@ -588,10 +581,7 @@ static void net_rx_action(unsigned long data)
 	struct skb_cb_overlay *sco;

 	struct netrx_pending_operations npo = {
-		.mmu   = netbk->rx_mmu,
-		.trans = netbk->grant_trans_op,
 		.copy  = netbk->grant_copy_op,
-		.mcl   = netbk->rx_mcl,
 		.meta  = netbk->meta,
 	};

@@ -617,50 +607,13 @@ static void net_rx_action(unsigned long data)

 	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));

-	npo.mmu_mcl = npo.mcl_prod;
-	if (npo.mcl_prod) {
-		BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-		BUG_ON(npo.mmu_prod > ARRAY_SIZE(netbk->rx_mmu));
-		mcl = npo.mcl + npo.mcl_prod++;
-
-		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
-		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-
-		mcl->op = __HYPERVISOR_mmu_update;
-		mcl->args[0] = (unsigned long)netbk->rx_mmu;
-		mcl->args[1] = npo.mmu_prod;
-		mcl->args[2] = 0;
-		mcl->args[3] = DOMID_SELF;
-	}
-
-	if (npo.trans_prod) {
-		BUG_ON(npo.trans_prod > ARRAY_SIZE(netbk->grant_trans_op));
-		mcl = npo.mcl + npo.mcl_prod++;
-		mcl->op = __HYPERVISOR_grant_table_op;
-		mcl->args[0] = GNTTABOP_transfer;
-		mcl->args[1] = (unsigned long)netbk->grant_trans_op;
-		mcl->args[2] = npo.trans_prod;
-	}
-
-	if (npo.copy_prod) {
-		BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
-		mcl = npo.mcl + npo.mcl_prod++;
-		mcl->op = __HYPERVISOR_grant_table_op;
-		mcl->args[0] = GNTTABOP_copy;
-		mcl->args[1] = (unsigned long)netbk->grant_copy_op;
-		mcl->args[2] = npo.copy_prod;
-	}
-
-	/* Nothing to do? */
-	if (!npo.mcl_prod)
+	if (!npo.copy_prod)
 		return;

-	BUG_ON(npo.mcl_prod > ARRAY_SIZE(netbk->rx_mcl));
-
-	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
+	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
+					npo.copy_prod);
 	BUG_ON(ret != 0);
-	/* The mmu_machphys_update() must not fail. */
-	BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);

 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
 		sco = (struct skb_cb_overlay *)skb->cb;
--
1.7.4


From 673a19d9e2d78939c6dc9c49e7e35ee54b54c8c7 Mon Sep 17 00:00:00 2001
From: Paul Durrant <paul.durrant@citrix.com>
Date: Fri, 2 Jul 2010 10:28:11 +0100
Subject: [PATCH 106/197] xen: netback: Fix basic indentation issue

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |   11 +++++++----
 1 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index ca65840..848503e 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -445,10 +445,13 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 		copy_gop = npo->copy + npo->copy_prod++;
 		copy_gop->flags = GNTCOPY_dest_gref;
 		if (PageForeign(page)) {
-		struct xen_netbk *netbk = &xen_netbk[group];
-		struct pending_tx_info *src_pend = &netbk->pending_tx_info[idx];
-		copy_gop->source.domid = src_pend->netif->domid;
-		copy_gop->source.u.ref = src_pend->req.gref;
+			struct xen_netbk *netbk = &xen_netbk[group];
+			struct pending_tx_info *src_pend;
+
+			src_pend = &netbk->pending_tx_info[idx];
+
+			copy_gop->source.domid = src_pend->netif->domid;
+			copy_gop->source.u.ref = src_pend->req.gref;
 			copy_gop->flags |= GNTCOPY_source_gref;
 		} else {
 			copy_gop->source.domid = DOMID_SELF;
--
1.7.4


From d08b2d1f2ff4723b335d0fb5b91ffd6cb6a005d3 Mon Sep 17 00:00:00 2001
From: Paul Durrant <paul.durrant@citrix.com>
Date: Mon, 5 Jul 2010 11:45:29 +0100
Subject: [PATCH 107/197] xen: netback: Add a new style of passing GSO packets to frontends.

feature-gso-tcpv4-prefix uses precedes the packet data passed to
the frontend with a ring entry that contains the necessary
metadata. This style of GSO passing is required for Citrix
Windows PV Drivers.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h     |    3 ++-
 drivers/xen/netback/netback.c    |   37 ++++++++++++++++++++++++++++++++++---
 drivers/xen/netback/xenbus.c     |   15 ++++++++++++---
 include/xen/interface/io/netif.h |    4 ++++
 4 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 08e7a0e..78451ab 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -80,7 +80,8 @@ struct xen_netif {
 	int features;

 	/* Internal feature information. */
-	u8 can_queue:1;	/* can queue packets for receiver? */
+	u8 can_queue:1;	    /* can queue packets for receiver? */
+	u8 gso_prefix:1;    /* use a prefix segment for GSO information */

 	/* Allow netif_be_start_xmit() to peek ahead in the rx request
 	 * ring.  This is a prediction of what rx_req_cons will be once
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 848503e..e93a62e 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -432,6 +432,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 			/* Overflowed this request, go to the next one */
 			req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
 			meta = npo->meta + npo->meta_prod++;
+			meta->gso_size = 0;
 			meta->size = 0;
 			meta->id = req->id;
 			npo->copy_off = 0;
@@ -492,9 +493,23 @@ static int netbk_gop_skb(struct sk_buff *skb,

 	old_meta_prod = npo->meta_prod;

+	/* Set up a GSO prefix descriptor, if necessary */
+	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
+		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+		meta = npo->meta + npo->meta_prod++;
+		meta->gso_size = skb_shinfo(skb)->gso_size;
+		meta->size = 0;
+		meta->id = req->id;
+	}
+
 	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
 	meta = npo->meta + npo->meta_prod++;
-	meta->gso_size = skb_shinfo(skb)->gso_size;
+
+	if (!netif->gso_prefix)
+		meta->gso_size = skb_shinfo(skb)->gso_size;
+	else
+		meta->gso_size = 0;
+
 	meta->size = 0;
 	meta->id = req->id;
 	npo->copy_off = 0;
@@ -506,7 +521,7 @@ static int netbk_gop_skb(struct sk_buff *skb,
 			    offset_in_page(skb->data), 1);

 	/* Leave a gap for the GSO descriptor. */
-	if (skb_shinfo(skb)->gso_size)
+	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
 		netif->rx.req_cons++;

 	for (i = 0; i < nr_frags; i++) {
@@ -623,6 +638,21 @@ static void net_rx_action(unsigned long data)

 		netif = netdev_priv(skb->dev);

+		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
+			resp = RING_GET_RESPONSE(&netif->rx,
+						netif->rx.rsp_prod_pvt++);
+
+			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
+
+			resp->offset = netbk->meta[npo.meta_cons].gso_size;
+			resp->id = netbk->meta[npo.meta_cons].id;
+			resp->status = sco->meta_slots_used;
+
+			npo.meta_cons++;
+			sco->meta_slots_used--;
+		}
+
+
 		netif->stats.tx_bytes += skb->len;
 		netif->stats.tx_packets++;

@@ -633,6 +663,7 @@ static void net_rx_action(unsigned long data)
 			flags = 0;
 		else
 			flags = NETRXF_more_data;
+
 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
 			flags |= NETRXF_csum_blank | NETRXF_data_validated;
 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
@@ -645,7 +676,7 @@ static void net_rx_action(unsigned long data)
 					netbk->meta[npo.meta_cons].size,
 					flags);

-		if (netbk->meta[npo.meta_cons].gso_size) {
+		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
 			struct xen_netif_extra_info *gso =
 				(struct xen_netif_extra_info *)
 				RING_GET_RESPONSE(&netif->rx,
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index e30b0c7..cda987f 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -457,16 +457,25 @@ static int connect_rings(struct backend_info *be)
 			be->netif->dev->mtu = ETH_DATA_LEN;
 	}

-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
-			 &val) < 0)
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
+			"%d", &val) < 0)
 		val = 0;
 	if (val) {
 		be->netif->features |= NETIF_F_TSO;
 		be->netif->dev->features |= NETIF_F_TSO;
 	}

+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
+			"%d", &val) < 0)
+		val = 0;
+	if (val) {
+		be->netif->features |= NETIF_F_TSO;
+		be->netif->dev->features |= NETIF_F_TSO;
+		be->netif->gso_prefix = 1;
+	}
+
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
-			 "%d", &val) < 0)
+			"%d", &val) < 0)
 		val = 0;
 	if (val) {
 		be->netif->features &= ~NETIF_F_IP_CSUM;
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index 518481c..8309344 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -131,6 +131,10 @@ struct xen_netif_rx_request {
 #define _NETRXF_extra_info     (3)
 #define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)

+/* GSO Prefix descriptor. */
+#define _NETRXF_gso_prefix     (4)
+#define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
+
 struct xen_netif_rx_response {
     uint16_t id;
     uint16_t offset;       /* Offset in page of start of received packet  */
--
1.7.4


From bd910979612331d60a629c16a49ebeb5efa0f035 Mon Sep 17 00:00:00 2001
From: Paul Durrant <paul.durrant@citrix.com>
Date: Fri, 2 Jul 2010 10:28:13 +0100
Subject: [PATCH 108/197] xen: netback: Make frontend features distinct from netback feature flags.

Make sure that if a feature flag is disabled by ethtool on netback
that we do not gratuitously re-enabled it when we check the frontend
features during ring connection.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h    |   14 ++++++--
 drivers/xen/netback/interface.c |   68 ++++++++++++++++++++++++++++++--------
 drivers/xen/netback/netback.c   |    2 +-
 drivers/xen/netback/xenbus.c    |   44 ++++++++++---------------
 4 files changed, 81 insertions(+), 47 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 78451ab..a5f3759 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -76,12 +76,17 @@ struct xen_netif {
 	struct vm_struct *tx_comms_area;
 	struct vm_struct *rx_comms_area;

-	/* Set of features that can be turned on in dev->features. */
-	int features;
+	/* Flags that must not be set in dev->features */
+	int features_disabled;
+
+	/* Frontend feature information. */
+	u8 can_sg:1;
+	u8 gso:1;
+	u8 gso_prefix:1;
+	u8 csum:1;

 	/* Internal feature information. */
 	u8 can_queue:1;	    /* can queue packets for receiver? */
-	u8 gso_prefix:1;    /* use a prefix segment for GSO information */

 	/* Allow netif_be_start_xmit() to peek ahead in the rx request
 	 * ring.  This is a prediction of what rx_req_cons will be once
@@ -187,6 +192,7 @@ void netif_accel_init(void);

 void netif_disconnect(struct xen_netif *netif);

+void netif_set_features(struct xen_netif *netif);
 struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
 int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
 	      unsigned long rx_ring_ref, unsigned int evtchn);
@@ -223,7 +229,7 @@ static inline int netbk_can_queue(struct net_device *dev)
 static inline int netbk_can_sg(struct net_device *dev)
 {
 	struct xen_netif *netif = netdev_priv(dev);
-	return netif->features & NETIF_F_SG;
+	return netif->can_sg;
 }

 struct pending_tx_info {
diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index 172ef4c..2e8508a 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -121,31 +121,69 @@ static int netbk_change_mtu(struct net_device *dev, int mtu)
 	return 0;
 }

-static int netbk_set_sg(struct net_device *dev, u32 data)
+void netif_set_features(struct xen_netif *netif)
 {
-	if (data) {
-		struct xen_netif *netif = netdev_priv(dev);
+	struct net_device *dev = netif->dev;
+	int features = dev->features;
+
+	if (netif->can_sg)
+		features |= NETIF_F_SG;
+	if (netif->gso || netif->gso_prefix)
+		features |= NETIF_F_TSO;
+	if (netif->csum)
+		features |= NETIF_F_IP_CSUM;
+
+	features &= ~(netif->features_disabled);

-		if (!(netif->features & NETIF_F_SG))
+	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
+		dev->mtu = ETH_DATA_LEN;
+
+	dev->features = features;
+}
+
+static int netbk_set_tx_csum(struct net_device *dev, u32 data)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	if (data) {
+		if (!netif->csum)
 			return -ENOSYS;
+		netif->features_disabled &= ~NETIF_F_IP_CSUM;
+	} else {
+		netif->features_disabled |= NETIF_F_IP_CSUM;
 	}

-	if (dev->mtu > ETH_DATA_LEN)
-		dev->mtu = ETH_DATA_LEN;
+	netif_set_features(netif);
+	return 0;
+}

-	return ethtool_op_set_sg(dev, data);
+static int netbk_set_sg(struct net_device *dev, u32 data)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	if (data) {
+		if (!netif->can_sg)
+			return -ENOSYS;
+		netif->features_disabled &= ~NETIF_F_SG;
+	} else {
+		netif->features_disabled |= NETIF_F_SG;
+	}
+
+	netif_set_features(netif);
+	return 0;
 }

 static int netbk_set_tso(struct net_device *dev, u32 data)
 {
+	struct xen_netif *netif = netdev_priv(dev);
 	if (data) {
-		struct xen_netif *netif = netdev_priv(dev);
-
-		if (!(netif->features & NETIF_F_TSO))
+		if (!netif->gso && !netif->gso_prefix)
 			return -ENOSYS;
+		netif->features_disabled &= ~NETIF_F_TSO;
+	} else {
+		netif->features_disabled |= NETIF_F_TSO;
 	}

-	return ethtool_op_set_tso(dev, data);
+	netif_set_features(netif);
+	return 0;
 }

 static void netbk_get_drvinfo(struct net_device *dev,
@@ -200,7 +238,7 @@ static struct ethtool_ops network_ethtool_ops =
 	.get_drvinfo = netbk_get_drvinfo,

 	.get_tx_csum = ethtool_op_get_tx_csum,
-	.set_tx_csum = ethtool_op_set_tx_csum,
+	.set_tx_csum = netbk_set_tx_csum,
 	.get_sg = ethtool_op_get_sg,
 	.set_sg = netbk_set_sg,
 	.get_tso = ethtool_op_get_tso,
@@ -242,7 +280,8 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
 	netif->domid  = domid;
 	netif->group  = -1;
 	netif->handle = handle;
-	netif->features = NETIF_F_SG;
+	netif->can_sg = 1;
+	netif->csum = 1;
 	atomic_set(&netif->refcnt, 1);
 	init_waitqueue_head(&netif->waiting_to_free);
 	netif->dev = dev;
@@ -259,8 +298,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
 	init_timer(&netif->tx_queue_timeout);

 	dev->netdev_ops	= &netback_ops;
-	dev->features   = NETIF_F_IP_CSUM|NETIF_F_SG;
-
+	netif_set_features(netif);
 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);

 	dev->tx_queue_len = netbk_queue_length;
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index e93a62e..63a771e 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -238,7 +238,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)

 static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
 {
-	if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
+	if (netif->can_sg || netif->gso || netif->gso_prefix)
 		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
 	return 1; /* all in one */
 }
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index cda987f..17ff5cf 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -404,6 +404,7 @@ static void connect(struct backend_info *be)

 static int connect_rings(struct backend_info *be)
 {
+	struct xen_netif *netif = be->netif;
 	struct xenbus_device *dev = be->dev;
 	unsigned long tx_ring_ref, rx_ring_ref;
 	unsigned int evtchn, rx_copy;
@@ -437,53 +438,42 @@ static int connect_rings(struct backend_info *be)
 	if (!rx_copy)
 		return -EOPNOTSUPP;

-	if (be->netif->dev->tx_queue_len != 0) {
+	if (netif->dev->tx_queue_len != 0) {
 		if (xenbus_scanf(XBT_NIL, dev->otherend,
 				 "feature-rx-notify", "%d", &val) < 0)
 			val = 0;
 		if (val)
-			be->netif->can_queue = 1;
+			netif->can_queue = 1;
 		else
 			/* Must be non-zero for pfifo_fast to work. */
-			be->netif->dev->tx_queue_len = 1;
+			netif->dev->tx_queue_len = 1;
 	}

-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
+			 "%d", &val) < 0)
 		val = 0;
-	if (!val) {
-		be->netif->features &= ~NETIF_F_SG;
-		be->netif->dev->features &= ~NETIF_F_SG;
-		if (be->netif->dev->mtu > ETH_DATA_LEN)
-			be->netif->dev->mtu = ETH_DATA_LEN;
-	}
+	netif->can_sg = !!val;

 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
-			"%d", &val) < 0)
+			 "%d", &val) < 0)
 		val = 0;
-	if (val) {
-		be->netif->features |= NETIF_F_TSO;
-		be->netif->dev->features |= NETIF_F_TSO;
-	}
+	netif->gso = !!val;

 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
-			"%d", &val) < 0)
+			 "%d", &val) < 0)
 		val = 0;
-	if (val) {
-		be->netif->features |= NETIF_F_TSO;
-		be->netif->dev->features |= NETIF_F_TSO;
-		be->netif->gso_prefix = 1;
-	}
+	netif->gso_prefix = !!val;

 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
-			"%d", &val) < 0)
+			 "%d", &val) < 0)
 		val = 0;
-	if (val) {
-		be->netif->features &= ~NETIF_F_IP_CSUM;
-		be->netif->dev->features &= ~NETIF_F_IP_CSUM;
-	}
+	netif->csum = !val;
+
+	/* Set dev->features */
+	netif_set_features(netif);

 	/* Map the shared frame, irq etc. */
-	err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
+	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
 	if (err) {
 		xenbus_dev_fatal(dev, err,
 				 "mapping shared-frames %lu/%lu port %u",
--
1.7.4


From cf8c20169427de5829e3ec723712b77de52e64ac Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 15 Jul 2010 10:46:50 -0700
Subject: [PATCH 109/197] xen: netback: only initialize for PV domains

HVM domains don't support netback

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 63a771e..911c85b 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1701,7 +1701,7 @@ static int __init netback_init(void)
 	int rc = 0;
 	int group;

-	if (!xen_domain())
+	if (!xen_pv_domain())
 		return -ENODEV;

 	xen_netbk_group_nr = num_online_cpus();
--
1.7.4


From 00a5c7eddb919701ac998b33bf4f283efaa06bbc Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 21 Jul 2010 13:24:26 -0700
Subject: [PATCH 110/197] xen/rings: make protocol specific usage of shared sring explicit

I don't think protocol specific data't really belongs in this header
but since it is already there and we seem to be stuck with it lets at
least make the users explicit lest people get caught out by future new
fields moving the pad field around.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
(cherry picked from commit df0afd34ec3015e44b8121d0e542d32fb04d438d)
---
 include/xen/interface/io/ring.h |    8 +++++++-
 1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
index e8cbf43..e2d62cf 100644
--- a/include/xen/interface/io/ring.h
+++ b/include/xen/interface/io/ring.h
@@ -73,7 +73,13 @@ union __name##_sring_entry {						\
 struct __name##_sring {							\
     RING_IDX req_prod, req_event;					\
     RING_IDX rsp_prod, rsp_event;					\
-    uint8_t  pad[48];							\
+    union {								\
+        struct {							\
+            uint8_t msg;						\
+        } tapif_user;							\
+        uint8_t pvt_pad[4];						\
+    } private;								\
+    uint8_t pad[44];							\
     union __name##_sring_entry ring[1]; /* variable-length */		\
 };									\
 									\
--
1.7.4


From 1866aec6117132b4399f9e956994af259ad5cfdb Mon Sep 17 00:00:00 2001
From: Bastian Blank <waldi@debian.org>
Date: Thu, 29 Jul 2010 17:30:18 +0200
Subject: [PATCH 111/197] xen: netback: Fix null-pointer access in netback_uevent

The uevent method of Xen netback does not check if the the network
device is already setup and tries to dereference a null-pointer if not.

Signed-off-by: Bastian Blank <waldi@debian.org>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/xenbus.c |   10 ++--------
 1 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index 17ff5cf..1fec65a 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -154,17 +154,11 @@ fail:
  */
 static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
 {
-	struct backend_info *be;
-	struct xen_netif *netif;
+	struct backend_info *be = dev_get_drvdata(&xdev->dev);
 	char *val;

 	DPRINTK("netback_uevent");

-	be = dev_get_drvdata(&xdev->dev);
-	if (!be)
-		return 0;
-	netif = be->netif;
-
 	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
 	if (IS_ERR(val)) {
 		int err = PTR_ERR(val);
@@ -179,7 +173,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
 		kfree(val);
 	}

-	if (add_uevent_var(env, "vif=%s", netif->dev->name))
+	if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name))
 		return -ENOMEM;

 	return 0;
--
1.7.4


From 7f1732b25d00393131220a0369caa8a28faf46e1 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 30 Jul 2010 15:16:47 +0100
Subject: [PATCH 112/197] xen: netback: check if foreign pages are actually netback-created foreign pages.

020ba906 "xen/netback: Multiple tasklets support." changed
netbk_gop_frag_copy to attempt to lookup a pending_tx_info for any
foreign page, regardless of whether the page was a netback-foreign
page.

In the case of non-netback pages this can lead to dereferencing a NULL
src_pend->netif.

Restore the behaviour of netif_page_index prior toa3031942
"xen/netback: Introduce a new struct type page_ext" by performing
tests to ensure that page is a netback page and extend the same checks
to netif_page_group.

Actually combine netif_page_{index,group} in to a single function
since they are always called together and it saves duplicating all the
checks.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Xu, Dongxiao <dongxiao.xu@intel.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |   56 ++++++++++++++++++++++++++++------------
 1 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 911c85b..95df223 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -89,18 +89,37 @@ static inline void netif_set_page_ext(struct page *pg, unsigned int group,
 	pg->mapping = ext.mapping;
 }

-static inline unsigned int netif_page_group(const struct page *pg)
+static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx)
 {
 	union page_ext ext = { .mapping = pg->mapping };
+	struct xen_netbk *netbk;
+	unsigned int group, idx;

-	return ext.e.group - 1;
-}
+	if (!PageForeign(pg))
+		return 0;

-static inline unsigned int netif_page_index(const struct page *pg)
-{
-	union page_ext ext = { .mapping = pg->mapping };
+	group = ext.e.group - 1;
+
+	if (group < 0 || group >= xen_netbk_group_nr)
+		return 0;
+
+	netbk = &xen_netbk[group];
+
+	if (netbk->mmap_pages == NULL)
+		return 0;

-	return ext.e.idx;
+	idx = ext.e.idx;
+
+	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
+		return 0;
+
+	if (netbk->mmap_pages[idx] != pg)
+		return 0;
+
+	*_group = group;
+	*_idx = idx;
+
+	return 1;
 }

 /*
@@ -386,8 +405,12 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 {
 	struct gnttab_copy *copy_gop;
 	struct netbk_rx_meta *meta;
-	int group = netif_page_group(page);
-	int idx = netif_page_index(page);
+	/*
+	 * These variables a used iff netif_get_page_ext returns true,
+	 * in which case they are guaranteed to be initialized.
+         */
+	unsigned int uninitialized_var(group), uninitialized_var(idx);
+	int foreign = netif_get_page_ext(page, &group, &idx);
 	unsigned long bytes;

 	/* Data must not cross a page boundary. */
@@ -445,7 +468,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,

 		copy_gop = npo->copy + npo->copy_prod++;
 		copy_gop->flags = GNTCOPY_dest_gref;
-		if (PageForeign(page)) {
+		if (foreign) {
 			struct xen_netbk *netbk = &xen_netbk[group];
 			struct pending_tx_info *src_pend;

@@ -1535,14 +1558,13 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)

 static void netif_page_release(struct page *page, unsigned int order)
 {
-	int group = netif_page_group(page);
-	int idx = netif_page_index(page);
-	struct xen_netbk *netbk = &xen_netbk[group];
+	unsigned int group, idx;
+	int foreign = netif_get_page_ext(page, &group, &idx);
+
+	BUG_ON(!foreign);
 	BUG_ON(order);
-	BUG_ON(group < 0 || group >= xen_netbk_group_nr);
-	BUG_ON(idx < 0 || idx >= MAX_PENDING_REQS);
-	BUG_ON(netbk->mmap_pages[idx] != page);
-	netif_idx_release(netbk, idx);
+
+	netif_idx_release(&xen_netbk[group], idx);
 }

 irqreturn_t netif_be_int(int irq, void *dev_id)
--
1.7.4


From 14a12990d12cd9ee919d5579c1d0c3df74ad66e7 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 30 Jul 2010 15:16:46 +0100
Subject: [PATCH 113/197] xen: netback: do not unleash netback threads until initialisation is complete

Otherwise netbk_action_thread can reference &netbk->net_schedule_list
(via tx_work_todo) before it is initialised. Until now it was zeroed
which is probably safe but not exactly robust.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Xu, Dongxiao <dongxiao.xu@intel.com>
Cc: Paul Durrant <Paul.Durrant@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 95df223..2646383 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1782,7 +1782,6 @@ static int __init netback_init(void)

 			if (!IS_ERR(netbk->kthread.task)) {
 				kthread_bind(netbk->kthread.task, group);
-				wake_up_process(netbk->kthread.task);
 			} else {
 				printk(KERN_ALERT
 					"kthread_run() fails at netback\n");
@@ -1808,6 +1807,9 @@ static int __init netback_init(void)
 		spin_lock_init(&netbk->net_schedule_list_lock);

 		atomic_set(&netbk->netfront_count, 0);
+
+		if (MODPARM_netback_kthread)
+			wake_up_process(netbk->kthread.task);
 	}

 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
--
1.7.4


From 6decec42c631f2e2e268f00ce8841faf38817ca8 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Thu, 2 Sep 2010 14:36:40 +0100
Subject: [PATCH 115/197] xen: netback: save interrupt state in add_to_net_schedule_list_tail

add_to_net_schedule_list_tail is called from both hard interrupt context
(add_to_net_schedule_list_tail) and soft interrupt/process context
(netif_schedule_work) so use the interrupt state saving spinlock
variants.

Fixes:
        ------------[ cut here ]------------
        WARNING: at kernel/lockdep.c:2323 trace_hardirqs_on_caller+0xef/0x1a0()
        Hardware name: PowerEdge 860
        Modules linked in: rtc_cmos rtc_core rtc_lib
        Pid: 16, comm: xenwatch Not tainted 2.6.32.18-x86_32p-xen0-00850-ge6b9b2c #98
        Call Trace:
         [<c103951c>] warn_slowpath_common+0x6c/0xc0
         [<c1039585>] warn_slowpath_null+0x15/0x20
         [<c105f60f>] trace_hardirqs_on_caller+0xef/0x1a0
         [<c105f6cb>] trace_hardirqs_on+0xb/0x10
         [<c136cc72>] _spin_unlock_irq+0x22/0x40
         [<c11ab9ef>] add_to_net_schedule_list_tail+0x5f/0xb0
         [<c11aba6b>] netif_be_int+0x2b/0x120
         [<c106dd8e>] handle_IRQ_event+0x2e/0xe0
         [<c106f98e>] handle_level_irq+0x6e/0xf0
         [<c1197cdf>] __xen_evtchn_do_upcall+0x16f/0x190
         [<c11981b8>] xen_evtchn_do_upcall+0x28/0x40
         [<c100b487>] xen_do_upcall+0x7/0xc
         [<c119bcf9>] xs_talkv+0x59/0x1a0
         [<c119bf6a>] xs_single+0x3a/0x50
         [<c119c6f9>] xenbus_read+0x39/0x60
         [<c11adf77>] frontend_changed+0x3e7/0x6a0
         [<c119d35a>] xenbus_otherend_changed+0x8a/0xa0
         [<c119d572>] frontend_changed+0x12/0x20
         [<c119b9dc>] xenwatch_thread+0x7c/0x140
         [<c104ea74>] kthread+0x74/0x80
         [<c100b433>] kernel_thread_helper+0x7/0x10
        ---[ end trace 48d73949a8e0909a ]---

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/netback.c |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 2646383..1d080f6 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -787,17 +787,19 @@ static void remove_from_net_schedule_list(struct xen_netif *netif)

 static void add_to_net_schedule_list_tail(struct xen_netif *netif)
 {
+	unsigned long flags;
+
 	struct xen_netbk *netbk = &xen_netbk[netif->group];
 	if (__on_net_schedule_list(netif))
 		return;

-	spin_lock_irq(&netbk->net_schedule_list_lock);
+	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
 	if (!__on_net_schedule_list(netif) &&
 	    likely(netif_schedulable(netif))) {
 		list_add_tail(&netif->list, &netbk->net_schedule_list);
 		netif_get(netif);
 	}
-	spin_unlock_irq(&netbk->net_schedule_list_lock);
+	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
 }

 void netif_schedule_work(struct xen_netif *netif)
--
1.7.4


From 0e667d904c6ab6c44cedef51ef00964f9e0559ba Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 8 Oct 2010 17:11:51 +0100
Subject: [PATCH 116/197] xen: netback: increase size of rx_meta array.

We can end up needing as many of these as we need grant copy operations so
increase the array size for the same reason.

Crash observed on XenServer:
kernel: ------------[ cut here ]------------
kernel: kernel BUG at drivers/xen/netback/netback.c:834!
kernel: invalid opcode: 0000 [#1] SMP
kernel: last sysfs file: /sys/devices/xen-backend/vbd-10-768/statistics/rd_usecs
kernel: Process netback (pid: 1413, ti=ec8a4000 task=ed0a6b70 task.ti=ec8a4000)
kernel: Stack: 00000000 00000612 00000001 00000000 00020000 00000000 ecfbe000 00000000
kernel:        ec8a5f80 ec8a5f98 ec8a5fac 00000000 c0537220 c0539220 00000000 c0534220
kernel:        cd7afaa0 cd7afaa0 0000000c 00000014 062de396 00000001 00000001 00000014
kernel: Call Trace:
kernel:  [<c0285f10>] ? netbk_action_thread+0x0/0x1fe0
kernel:  [<c013daf2>] ? kthread+0x42/0x70
kernel:  [<c013dab0>] ? kthread+0x0/0x70
kernel:  [<c010569b>] ? kernel_thread_helper+0x7/0x10
kernel:  =======================
kernel: Code: 00 00 c7 42 08 20 82 53 c0 8b 85 e4 fe ff ff c7 42 10 00 00 00 00 \
              c7 42 14 f0 7f 00 00 89 42 0c 8b 8d ec fe ff ff e9 3e e9 ff ff <0f> \
              0b eb fe 0f 0b eb fe 0f 0b eb fe 0f 0b eb fe 31 c0 e8 bf 31
kernel: EIP: [<c028790a>] netbk_action_thread+0x19fa/0x1fe0 SS:ESP 0069:ec8a5d98

Corresponding to
	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/netback/common.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index a5f3759..ce0041a 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -319,7 +319,7 @@ struct xen_netbk {
 	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
 	unsigned char rx_notify[NR_IRQS];
 	u16 notify_list[NET_RX_RING_SIZE];
-	struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
 };

 extern struct xen_netbk *xen_netbk;
--
1.7.4


From 36713152990836043c908777654ea01ed13ccdf4 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 15 Oct 2010 13:41:44 +0100
Subject: [PATCH 117/197] xen: netback: take net_schedule_list_lock when removing entry from net_schedule_list

There is a race in net_tx_build_mops between checking if
net_schedule_list is empty and actually dequeuing the first entry on
the list. If another thread dequeues the only entry on the list during
this window we crash because list_first_entry expects a non-empty
list, like so:

[ 0.133127] BUG: unable to handle kernel NULL pointer dereference at 00000008
[ 0.133132] IP: [<c12aae71>] net_tx_build_mops+0x91/0xa70
[ 0.133142] *pdpt = 0000000000000000 *pde = 000000000000000f
[ 0.133147] Oops: 0002 1 SMP
[ 0.133150] last sysfs file:
[ 0.133152] Modules linked in:
[ 0.133154]
[ 0.133156] Pid: 55, comm: netback/1 Not tainted (2.6.32.12-0.7.1 #1) Latitude E4310
[ 0.133158] EIP: 0061:[<c12aae71>] EFLAGS: 00010202 CPU: 1
[ 0.133161] EIP is at net_tx_build_mops+0x91/0xa70
[ 0.133163] EAX: 00000012 EBX: 00000008 ECX: e112b734 EDX: e112b76c
[ 0.133165] ESI: ffffff30 EDI: 00000000 EBP: e112b734 ESP: dfe85d98
[ 0.133167] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
[ 0.133169] Process netback/1 (pid: 55, ti=dfe84000 task=dfe83340 task.ti=dfe84000)
[ 0.133170] Stack:
[ 0.133172] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[ 0.133177] <0> 00000000 e112b734 e112ec08 e112b7f8 e112ec08 ffffff30 00000000 00000000
[ 0.133186] <0> 00000000 00000000 00000000 e112b76c dfe85df4 00000001 00000000 aaaaaaaa
[ 0.133193] Call Trace:
[ 0.133202] [<c12abc7f>] net_tx_action+0x42f/0xac0
[ 0.133206] [<c12ac37a>] netbk_action_thread+0x6a/0x1b0
[ 0.133212] [<c1057444>] kthread+0x74/0x80
[ 0.133218] [<c10049d7>] kernel_thread_helper+0x7/0x10
[ 0.133220] Code: c4 00 00 00 89 74 24 58 39 74 24 2c 0f 84 c7 06 00 00 8b 74 24 \
                  58 8b 5c 24 58 81 ee d0 00 00 00 83 c3 08 89 74 24 34 8b 7c 24 \
             58 <f0> ff 47 08 89 f0 e8 b4 f9 ff ff 8b 46 2c 8b 56 34 89 44 24 5c
[ 0.133261] EIP: [<c12aae71>] net_tx_build_mops+0x91/0xa70 SS:ESP 0069:dfe85d98
[ 0.133265] CR2: 0000000000000008
[ 0.133274] --[ end trace e2c5c15f54bd9d93 ]--

Therefore after the initial lock free check for an empty list check
again with the lock held before dequeueing the entry.

Based on a patch by Tomasz Wroblewski.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Tomasz Wroblewski <tomasz.wroblewski@citrix.com>
---
 drivers/xen/netback/netback.c |   35 ++++++++++++++++++++++++++++-------
 1 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 1d080f6..3b03435 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -774,15 +774,34 @@ static int __on_net_schedule_list(struct xen_netif *netif)
 	return !list_empty(&netif->list);
 }

+/* Must be called with net_schedule_list_lock held */
 static void remove_from_net_schedule_list(struct xen_netif *netif)
 {
-	struct xen_netbk *netbk = &xen_netbk[netif->group];
-	spin_lock_irq(&netbk->net_schedule_list_lock);
 	if (likely(__on_net_schedule_list(netif))) {
 		list_del_init(&netif->list);
 		netif_put(netif);
 	}
+}
+
+static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
+{
+	struct xen_netif *netif = NULL;
+
+	spin_lock_irq(&netbk->net_schedule_list_lock);
+	if (list_empty(&netbk->net_schedule_list))
+		goto out;
+
+	netif = list_first_entry(&netbk->net_schedule_list,
+				 struct xen_netif, list);
+	if (!netif)
+		goto out;
+
+	netif_get(netif);
+
+	remove_from_net_schedule_list(netif);
+out:
 	spin_unlock_irq(&netbk->net_schedule_list_lock);
+	return netif;
 }

 static void add_to_net_schedule_list_tail(struct xen_netif *netif)
@@ -817,7 +836,10 @@ void netif_schedule_work(struct xen_netif *netif)

 void netif_deschedule_work(struct xen_netif *netif)
 {
+	struct xen_netbk *netbk = &xen_netbk[netif->group];
+	spin_lock_irq(&netbk->net_schedule_list_lock);
 	remove_from_net_schedule_list(netif);
+	spin_unlock_irq(&netbk->net_schedule_list_lock);
 }


@@ -1301,12 +1323,11 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
 		int work_to_do;
 		unsigned int data_len;
 		pending_ring_idx_t index;
-
+
 		/* Get a netif from the list with work to do. */
-		netif = list_first_entry(&netbk->net_schedule_list,
-				struct xen_netif, list);
-		netif_get(netif);
-		remove_from_net_schedule_list(netif);
+		netif = poll_net_schedule_list(netbk);
+		if (!netif)
+			continue;

 		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
 		if (!work_to_do) {
--
1.7.4


From 6ad4931abe4b111253da13805cc504cc72b0df1c Mon Sep 17 00:00:00 2001
From: Paul Durrant <paul.durrant@citrix.com>
Date: Wed, 15 Dec 2010 09:48:12 +0000
Subject: [PATCH 118/197] xen: netback: Re-define PKT_PROT_LEN to be bigger.

Re-define PKT_PROT_LEN to be big enough to handle maximal IPv4 and TCP options and phrase
the definition so that it's reasonably obvious that's what it's for.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |   14 +++++++++-----
 1 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 3b03435..9bbd230 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -36,9 +36,11 @@

 #include "common.h"

-#include <linux/tcp.h>
-#include <linux/udp.h>
 #include <linux/kthread.h>
+#include <linux/if_vlan.h>
+#include <linux/udp.h>
+
+#include <net/tcp.h>

 #include <xen/balloon.h>
 #include <xen/events.h>
@@ -125,10 +127,12 @@ static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsi
 /*
  * This is the amount of packet we copy rather than map, so that the
  * guest can't fiddle with the contents of the headers while we do
- * packet processing on them (netfilter, routing, etc). 72 is enough
- * to cover TCP+IP headers including options.
+ * packet processing on them (netfilter, routing, etc).
  */
-#define PKT_PROT_LEN 72
+#define PKT_PROT_LEN    (ETH_HLEN + \
+			 VLAN_HLEN + \
+			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
+			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)

 static inline pending_ring_idx_t pending_index(unsigned i)
 {
--
1.7.4


From a2d629a773aba2049106bad81596ef88e80a9cd4 Mon Sep 17 00:00:00 2001
From: Paul Durrant <paul.durrant@citrix.com>
Date: Tue, 14 Dec 2010 20:35:19 +0000
Subject: [PATCH 119/197] xen: netback: Don't count packets we don't actually receive.

Make sure we only bump rx_packets when we're definitely going to call netif_rx_ni().

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 9bbd230..78d3509 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1512,9 +1512,6 @@ static void net_tx_submit(struct xen_netbk *netbk)
 		skb->dev      = netif->dev;
 		skb->protocol = eth_type_trans(skb, skb->dev);

-		netif->stats.rx_bytes += skb->len;
-		netif->stats.rx_packets++;
-
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			if (skb_checksum_setup(skb)) {
 				DPRINTK("Can't setup checksum in net_tx_action\n");
@@ -1530,6 +1527,9 @@ static void net_tx_submit(struct xen_netbk *netbk)
 			continue;
 		}

+		netif->stats.rx_bytes += skb->len;
+		netif->stats.rx_packets++;
+
 		netif_rx_ni(skb);
 		netif->dev->last_rx = jiffies;
 	}
--
1.7.4


From c6efc62e71720df66d9a91d33a3de813f0ec41c2 Mon Sep 17 00:00:00 2001
From: Paul Durrant <paul.durrant@citrix.com>
Date: Tue, 14 Dec 2010 20:35:20 +0000
Subject: [PATCH 120/197] xen: netback: Remove the 500ms timeout to restart the netif queue.

It is generally unhelpful as it results in a massive tail-drop should a guest become
unresponsive for a relatively short period of time and no back-pressure (other than
that caused by a higher layer protocol) is applied to the sender.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |   20 +-------------------
 1 files changed, 1 insertions(+), 19 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 78d3509..2caa5f8 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -275,13 +275,6 @@ static inline int netbk_queue_full(struct xen_netif *netif)
 	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
 }

-static void tx_queue_callback(unsigned long data)
-{
-	struct xen_netif *netif = (struct xen_netif *)data;
-	if (netif_schedulable(netif))
-		netif_wake_queue(netif->dev);
-}
-
 /* Figure out how many ring slots we're going to need to send @skb to
    the guest. */
 static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
@@ -364,19 +357,8 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif->rx.sring->req_event = netif->rx_req_cons_peek +
 			netbk_max_required_rx_slots(netif);
 		mb(); /* request notification /then/ check & stop the queue */
-		if (netbk_queue_full(netif)) {
+		if (netbk_queue_full(netif))
 			netif_stop_queue(dev);
-			/*
-			 * Schedule 500ms timeout to restart the queue, thus
-			 * ensuring that an inactive queue will be drained.
-			 * Packets will be immediately be dropped until more
-			 * receive buffers become available (see
-			 * netbk_queue_full() check above).
-			 */
-			netif->tx_queue_timeout.data = (unsigned long)netif;
-			netif->tx_queue_timeout.function = tx_queue_callback;
-			mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
-		}
 	}
 	skb_queue_tail(&netbk->rx_queue, skb);

--
1.7.4


From 0e8da97315f8cc89611f9194097931df4e67efc7 Mon Sep 17 00:00:00 2001
From: Paul Durrant <paul.durrant@citrix.com>
Date: Tue, 14 Dec 2010 20:35:21 +0000
Subject: [PATCH 121/197] xen: netback: Add a missing test to tx_work_todo.

Adda test so that, when netback is using worker threads, net_tx_action()
gets called in a timely manner when the pending_inuse list is populated.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 2caa5f8..dd52d01 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1695,6 +1695,10 @@ static inline int tx_work_todo(struct xen_netbk *netbk)
 	if (netbk->dealloc_cons != netbk->dealloc_prod)
 		return 1;

+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+	    !list_empty(&netbk->pending_inuse_head))
+		return 1;
+
 	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
 			!list_empty(&netbk->net_schedule_list))
 		return 1;
--
1.7.4


From e2f4dacefdb6cdff5e4e0b380632ff7ca750ba8b Mon Sep 17 00:00:00 2001
From: Paul Durrant <paul.durrant@citrix.com>
Date: Tue, 14 Dec 2010 20:35:22 +0000
Subject: [PATCH 122/197] xen: netback: Re-factor net_tx_action_dealloc() slightly.

There is no need for processing of the pending_inuse list to be within the dealloc_prod/cons
loop.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |   26 ++++++++++++++------------
 1 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index dd52d01..53b3a0e 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -906,11 +906,20 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
 			gop++;
 		}

-		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
-		    list_empty(&netbk->pending_inuse_head))
-			break;
+	} while (dp != netbk->dealloc_prod);
+
+	netbk->dealloc_cons = dc;

-		/* Copy any entries that have been pending for too long. */
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
+		gop - netbk->tx_unmap_ops);
+	BUG_ON(ret);
+
+	/*
+	 * Copy any entries that have been pending for too long
+	 */
+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+	    !list_empty(&netbk->pending_inuse_head)) {
 		list_for_each_entry_safe(inuse, n,
 				&netbk->pending_inuse_head, list) {
 			struct pending_tx_info *pending_tx_info;
@@ -936,14 +945,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)

 			break;
 		}
-	} while (dp != netbk->dealloc_prod);
-
-	netbk->dealloc_cons = dc;
-
-	ret = HYPERVISOR_grant_table_op(
-		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
-		gop - netbk->tx_unmap_ops);
-	BUG_ON(ret);
+	}

 	list_for_each_entry_safe(inuse, n, &list, list) {
 		struct pending_tx_info *pending_tx_info;
--
1.7.4


From 27e6a8538a7e781f4774e4746f67eb113996333d Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 19 Jan 2011 12:43:38 +0000
Subject: [PATCH 124/197] xen: netback: completely remove tx_queue_timer

"xen: netback: Remove the 500ms timeout to restart the netif queue." missed
removing the timer initialisation.

Also remove the related comment which has been obsolete since the default for
MODPARM_copy_skb was switched to true some time ago.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Paul Durrant <Paul.Durrant@citrix.com>
---
 drivers/xen/netback/common.h    |    3 ---
 drivers/xen/netback/interface.c |   13 +------------
 2 files changed, 1 insertions(+), 15 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index ce0041a..7e03a46 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -99,9 +99,6 @@ struct xen_netif {
 	unsigned long   remaining_credit;
 	struct timer_list credit_timeout;

-	/* Enforce draining of the transmit queue. */
-	struct timer_list tx_queue_timeout;
-
 	/* Statistics */
 	int nr_copied_skbs;

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index 2e8508a..efdc21c 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -41,15 +41,7 @@
  * Module parameter 'queue_length':
  *
  * Enables queuing in the network stack when a client has run out of receive
- * descriptors. Although this feature can improve receive bandwidth by avoiding
- * packet loss, it can also result in packets sitting in the 'tx_queue' for
- * unbounded time. This is bad if those packets hold onto foreign resources.
- * For example, consider a packet that holds onto resources belonging to the
- * guest for which it is queued (e.g., packet received on vif1.0, destined for
- * vif1.1 which is not activated in the guest): in this situation the guest
- * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
- * run a timer (tx_queue_timeout) to drain the queue when the interface is
- * blocked.
+ * descriptors.
  */
 static unsigned long netbk_queue_length = 32;
 module_param_named(queue_length, netbk_queue_length, ulong, 0644);
@@ -295,8 +287,6 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
 	/* Initialize 'expires' now: it's used to track the credit window. */
 	netif->credit_timeout.expires = jiffies;

-	init_timer(&netif->tx_queue_timeout);
-
 	dev->netdev_ops	= &netback_ops;
 	netif_set_features(netif);
 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
@@ -458,7 +448,6 @@ void netif_disconnect(struct xen_netif *netif)
 	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);

 	del_timer_sync(&netif->credit_timeout);
-	del_timer_sync(&netif->tx_queue_timeout);

 	if (netif->irq)
 		unbind_from_irqhandler(netif->irq, netif);
--
1.7.4


From 83381aa69cf38fc26125019479527e0710fe27cd Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 15 Dec 2010 13:31:03 -0500
Subject: [PATCH 133/197] ttm: Set VM_IO only on pages with TTM_MEMTYPE_FLAG_FIXED set.

This patch is based on "[Patch RFC] ttm: nouveau accelerated on Xen
pv-ops kernel"
http://lists.freedesktop.org/archives/nouveau/2010-March/005326.html

Under Xen, the PFN of page is virtualized. The physical addresses used
for DMA programming needs to be the Machine Frame Number (MFN).
Xen transparently does the correct translation using the _PAGE_IOMEM
PTE bit. If the bit is set, Xen assumes that the backing memory is in
the IOMEM space, and PFN equals MFN. If not set, page_to_pfn() returns
a phantom MFN.

The patch enables the ttm_bo_vm_fault() handler to behave correctly
under Xen, and has no side-effects on normal (not under Xen) operations.

The use of TTM_MEMTYPE_FLAG_FIXED in the check assumes that
only pages which have this flag are backed by device memory or IO.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Arvind R <arvino55@gmail.com>

Conflicts:

	drivers/gpu/drm/ttm/ttm_bo_vm.c
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 221b924..bb24374 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -247,6 +247,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 {
 	struct ttm_bo_driver *driver;
 	struct ttm_buffer_object *bo;
+	struct ttm_mem_type_manager *man;
 	int ret;

 	read_lock(&bdev->vm_lock);
@@ -279,7 +280,11 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 	 */

 	vma->vm_private_data = bo;
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_RESERVED | VM_MIXEDMAP | VM_DONTEXPAND;
+	man = &bdev->man[bo->mem.mem_type];
+	if (man->flags & TTM_MEMTYPE_FLAG_FIXED)
+		vma->vm_flags |= VM_IO;
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	return 0;
 out_unref:
 	ttm_bo_unref(&bo);
--
1.7.4


From 9c2e85765d147fc77ae27cb81a7091942f22a584 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 15 Dec 2010 13:32:15 -0500
Subject: [PATCH 134/197] drm: recompute vma->vm_page_prot after changing vm_flags

vm_get_page_prot() computes vm_page_prot depending on vm_flags, so
we need to re-call it if we change flags.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Conflicts:

	drivers/gpu/drm/ttm/ttm_bo_vm.c
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index bb24374..735dc1d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -300,6 +300,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
 	vma->vm_ops = &ttm_bo_vm_ops;
 	vma->vm_private_data = ttm_bo_reference(bo);
 	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_fbdev_mmap);
--
1.7.4


From 74632f8e51618dc31beba712d03dd0f1168cc241 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 20 Feb 2009 15:58:42 -0500
Subject: [PATCH 135/197] x86: define arch_vm_get_page_prot to set _PAGE_IOMAP on VM_IO vmas

Set _PAGE_IOMAP in ptes mapping a VM_IO vma.  This says that the mapping
is of a real piece of physical hardware, and not just system memory.

Xen, in particular, uses to this to inhibit the normal pfn->mfn conversion
that would normally happen - in other words, treat the address directly
as a machine physical address without converting it from pseudo-physical.

[ Impact: make VM_IO mappings map the right thing under Xen ]
[ v2: rebased on v2.6.37-rc1]
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/pgtable.h |    3 +++
 arch/x86/mm/pgtable.c          |   10 ++++++++++
 2 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 18601c8..284ee01 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -485,6 +485,9 @@ static inline unsigned long pages_to_mb(unsigned long npg)
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
 	remap_pfn_range(vma, vaddr, pfn, size, prot)

+#define arch_vm_get_page_prot arch_vm_get_page_prot
+extern pgprot_t arch_vm_get_page_prot(unsigned vm_flags);
+
 #if PAGETABLE_LEVELS > 2
 static inline int pud_none(pud_t pud)
 {
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 500242d..1e72207 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -15,6 +15,16 @@

 gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;

+pgprot_t arch_vm_get_page_prot(unsigned vm_flags)
+{
+	pgprot_t ret = __pgprot(0);
+
+	if (vm_flags & VM_IO)
+		ret = __pgprot(_PAGE_IOMAP);
+
+	return ret;
+}
+
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
 	return (pte_t *)__get_free_page(PGALLOC_GFP);
--
1.7.4


From 81ec0e742ce919124909640039c05baa29b1568a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 8 Dec 2010 11:03:27 -0800
Subject: [PATCH 136/197] mm: remove unused "token" argument from apply_to_page_range callback.

The argument is basically the struct page of the pte_t * passed into
the callback.  But there's no need to pass that, since it can be fairly
easily derived from the pte_t * itself if needed (and no current users
need to do that anyway).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/grant-table.c |    6 ++----
 arch/x86/xen/mmu.c         |    3 +--
 include/linux/mm.h         |    3 +--
 mm/memory.c                |    2 +-
 mm/vmalloc.c               |    2 +-
 5 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 49ba9b5..5bf892a 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -44,8 +44,7 @@

 #include <asm/pgtable.h>

-static int map_pte_fn(pte_t *pte, struct page *pmd_page,
-		      unsigned long addr, void *data)
+static int map_pte_fn(pte_t *pte, unsigned long addr, void *data)
 {
 	unsigned long **frames = (unsigned long **)data;

@@ -54,8 +53,7 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page,
 	return 0;
 }

-static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
-			unsigned long addr, void *data)
+static int unmap_pte_fn(pte_t *pte, unsigned long addr, void *data)
 {

 	set_pte_at(&init_mm, addr, pte, __pte(0));
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61..38ba804 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2292,8 +2292,7 @@ struct remap_data {
 	struct mmu_update *mmu_update;
 };

-static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
-				 unsigned long addr, void *data)
+static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
 {
 	struct remap_data *rmd = data;
 	pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 956a355..bb898ec 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1529,8 +1529,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_MLOCK	0x40	/* mark page as mlocked */
 #define FOLL_SPLIT	0x80	/* don't return transhuge pages, split them */

-typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
-			void *data);
+typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
 			       unsigned long size, pte_fn_t fn, void *data);

diff --git a/mm/memory.c b/mm/memory.c
index 31250fa..740470c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2032,7 +2032,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	token = pmd_pgtable(*pmd);

 	do {
-		err = fn(pte++, token, addr, data);
+		err = fn(pte++, addr, data);
 		if (err)
 			break;
 	} while (addr += PAGE_SIZE, addr != end);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f9b1667..5ddbdfe 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2061,7 +2061,7 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
 }


-static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
+static int f(pte_t *pte, unsigned long addr, void *data)
 {
 	/* apply_to_page_range() does all the hard work. */
 	return 0;
--
1.7.4


From 7f635db45f8e921c9203fdfb904d0095b7af6480 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 30 Nov 2010 10:03:44 -0800
Subject: [PATCH 137/197] mm: add apply_to_page_range_batch()

apply_to_page_range() calls its callback function once for each pte, which
is pretty inefficient since it will almost always be operating on a batch
of adjacent ptes.  apply_to_page_range_batch() calls its callback
with both a pte_t * and a count, so it can operate on multiple ptes at
once.

The callback is expected to handle all its ptes, or return an error.  For
both apply_to_page_range and apply_to_page_range_batch, it is up to
the caller to work out how much progress was made if either fails with
an error.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 include/linux/mm.h |    6 +++++
 mm/memory.c        |   57 +++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bb898ec..5a32a8a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1533,6 +1533,12 @@ typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
 			       unsigned long size, pte_fn_t fn, void *data);

+typedef int (*pte_batch_fn_t)(pte_t *pte, unsigned count,
+			      unsigned long addr, void *data);
+extern int apply_to_page_range_batch(struct mm_struct *mm,
+				     unsigned long address, unsigned long size,
+				     pte_batch_fn_t fn, void *data);
+
 #ifdef CONFIG_PROC_FS
 void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
 #else
diff --git a/mm/memory.c b/mm/memory.c
index 740470c..496e4e6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2012,11 +2012,10 @@ EXPORT_SYMBOL(remap_pfn_range);

 static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 				     unsigned long addr, unsigned long end,
-				     pte_fn_t fn, void *data)
+				     pte_batch_fn_t fn, void *data)
 {
 	pte_t *pte;
 	int err;
-	pgtable_t token;
 	spinlock_t *uninitialized_var(ptl);

 	pte = (mm == &init_mm) ?
@@ -2028,25 +2027,17 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	BUG_ON(pmd_huge(*pmd));

 	arch_enter_lazy_mmu_mode();
-
-	token = pmd_pgtable(*pmd);
-
-	do {
-		err = fn(pte++, addr, data);
-		if (err)
-			break;
-	} while (addr += PAGE_SIZE, addr != end);
-
+	err = fn(pte, (end - addr) / PAGE_SIZE, addr, data);
 	arch_leave_lazy_mmu_mode();

 	if (mm != &init_mm)
-		pte_unmap_unlock(pte-1, ptl);
+		pte_unmap_unlock(pte, ptl);
 	return err;
 }

 static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
 				     unsigned long addr, unsigned long end,
-				     pte_fn_t fn, void *data)
+				     pte_batch_fn_t fn, void *data)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -2068,7 +2059,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,

 static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
 				     unsigned long addr, unsigned long end,
-				     pte_fn_t fn, void *data)
+				     pte_batch_fn_t fn, void *data)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -2090,8 +2081,9 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
  * Scan a region of virtual memory, filling in page tables as necessary
  * and calling a provided function on each leaf page table.
  */
-int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
-			unsigned long size, pte_fn_t fn, void *data)
+int apply_to_page_range_batch(struct mm_struct *mm,
+			      unsigned long addr, unsigned long size,
+			      pte_batch_fn_t fn, void *data)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -2109,6 +2101,39 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,

 	return err;
 }
+EXPORT_SYMBOL_GPL(apply_to_page_range_batch);
+
+struct pte_single_fn
+{
+	pte_fn_t fn;
+	void *data;
+};
+
+static int apply_pte_batch(pte_t *pte, unsigned count,
+			   unsigned long addr, void *data)
+{
+	struct pte_single_fn *single = data;
+	int err = 0;
+
+	while (count--) {
+		err = single->fn(pte, addr, single->data);
+		if (err)
+			break;
+
+		addr += PAGE_SIZE;
+		pte++;
+	}
+
+	return err;
+}
+
+int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+			unsigned long size, pte_fn_t fn, void *data)
+{
+	struct pte_single_fn single = { .fn = fn, .data = data };
+	return apply_to_page_range_batch(mm, addr, size,
+					 apply_pte_batch, &single);
+}
 EXPORT_SYMBOL_GPL(apply_to_page_range);

 /*
--
1.7.4


From dea51c0fa49689f6a489205c00ebf83c8e78f6cd Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 6 Dec 2010 12:26:53 -0800
Subject: [PATCH 138/197] ioremap: use apply_to_page_range_batch() for ioremap_page_range()

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 lib/ioremap.c |   85 +++++++++++++++------------------------------------------
 1 files changed, 22 insertions(+), 63 deletions(-)

diff --git a/lib/ioremap.c b/lib/ioremap.c
index da4e2ad..e75d0d1 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -13,81 +13,40 @@
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>

-static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+struct ioremap_data
 {
-	pte_t *pte;
+	phys_addr_t phys_addr;
+	pgprot_t prot;
+};
+
+static int ioremap_pte_range(pte_t *pte, unsigned count,
+			     unsigned long addr, void *v)
+{
+	struct ioremap_data *data = v;
 	u64 pfn;

-	pfn = phys_addr >> PAGE_SHIFT;
-	pte = pte_alloc_kernel(pmd, addr);
-	if (!pte)
-		return -ENOMEM;
-	do {
-		BUG_ON(!pte_none(*pte));
-		set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
-		pfn++;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	return 0;
-}
+	pfn = data->phys_addr >> PAGE_SHIFT;
+	data->phys_addr += count * PAGE_SIZE;

-static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
-{
-	pmd_t *pmd;
-	unsigned long next;
+	while (count--) {
+		BUG_ON(!pte_none(*pte));

-	phys_addr -= addr;
-	pmd = pmd_alloc(&init_mm, pud, addr);
-	if (!pmd)
-		return -ENOMEM;
-	do {
-		next = pmd_addr_end(addr, end);
-		if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot))
-			return -ENOMEM;
-	} while (pmd++, addr = next, addr != end);
-	return 0;
-}
+		set_pte_at(&init_mm, addr, pte++, pfn_pte(pfn++, data->prot));

-static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
-{
-	pud_t *pud;
-	unsigned long next;
+		addr += PAGE_SIZE;
+	}

-	phys_addr -= addr;
-	pud = pud_alloc(&init_mm, pgd, addr);
-	if (!pud)
-		return -ENOMEM;
-	do {
-		next = pud_addr_end(addr, end);
-		if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot))
-			return -ENOMEM;
-	} while (pud++, addr = next, addr != end);
 	return 0;
 }

-int ioremap_page_range(unsigned long addr,
-		       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+int ioremap_page_range(unsigned long addr, unsigned long end,
+		       phys_addr_t phys_addr, pgprot_t prot)
 {
-	pgd_t *pgd;
-	unsigned long start;
-	unsigned long next;
-	int err;
-
-	BUG_ON(addr >= end);
-
-	start = addr;
-	phys_addr -= addr;
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
-		if (err)
-			break;
-	} while (pgd++, addr = next, addr != end);
+	struct ioremap_data data = { .phys_addr = phys_addr, .prot = prot };
+	int err = apply_to_page_range_batch(&init_mm, addr, end - addr,
+					    ioremap_pte_range, &data);

-	flush_cache_vmap(start, end);
+	flush_cache_vmap(addr, end);

 	return err;
 }
--
1.7.4


From 7a064a31021ba0b4adfc90061d7da2daa9b3d27e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 29 Nov 2010 12:22:24 -0800
Subject: [PATCH 139/197] vmalloc: use plain pte_clear() for unmaps

ptep_get_and_clear() is potentially moderately expensive (at least
an atomic operation, or potentially a trap-and-fault when virtualized)
so use a plain pte_clear().

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 mm/vmalloc.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5ddbdfe..c06dc1e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -39,8 +39,9 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)

 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
+		pte_t ptent = *pte;
 		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
+		pte_clear(&init_mm, addr, pte);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }

--
1.7.4


From 334c14835ef823ce665eeebf6aad467064f47e47 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 29 Nov 2010 11:06:19 -0800
Subject: [PATCH 140/197] vmalloc: use apply_to_page_range_batch() for vunmap_page_range()

There's no need to open-code it when there's helpful utility function
to do the job.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Nick Piggin <npiggin@kernel.dk>
---
 mm/vmalloc.c |   53 +++++++++--------------------------------------------
 1 files changed, 9 insertions(+), 44 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c06dc1e..e99aa3b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -33,59 +33,24 @@

 /*** Page table manipulation functions ***/

-static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
+static int vunmap_pte(pte_t *pte, unsigned count,
+		      unsigned long addr, void *data)
 {
-	pte_t *pte;
-
-	pte = pte_offset_kernel(pmd, addr);
-	do {
+	while (count--) {
 		pte_t ptent = *pte;
-		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
-		pte_clear(&init_mm, addr, pte);
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-}
-
-static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
-{
-	pmd_t *pmd;
-	unsigned long next;

-	pmd = pmd_offset(pud, addr);
-	do {
-		next = pmd_addr_end(addr, end);
-		if (pmd_none_or_clear_bad(pmd))
-			continue;
-		vunmap_pte_range(pmd, addr, next);
-	} while (pmd++, addr = next, addr != end);
-}
+		WARN_ON(!pte_none(ptent) && !pte_present(ptent));

-static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
-{
-	pud_t *pud;
-	unsigned long next;
+		pte_clear(&init_mm, addr, pte++);
+		addr += PAGE_SIZE;
+	}

-	pud = pud_offset(pgd, addr);
-	do {
-		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud))
-			continue;
-		vunmap_pmd_range(pud, addr, next);
-	} while (pud++, addr = next, addr != end);
+	return 0;
 }

 static void vunmap_page_range(unsigned long addr, unsigned long end)
 {
-	pgd_t *pgd;
-	unsigned long next;
-
-	BUG_ON(addr >= end);
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
-			continue;
-		vunmap_pud_range(pgd, addr, next);
-	} while (pgd++, addr = next, addr != end);
+	apply_to_page_range_batch(&init_mm, addr, end - addr, vunmap_pte, NULL);
 }

 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
--
1.7.4


From 937b74f8d19f7e62d63d4e82c2cf21f3bd636d9e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 29 Nov 2010 11:11:45 -0800
Subject: [PATCH 141/197] vmalloc: use apply_to_page_range_batch() for vmap_page_range_noflush()

There's no need to open-code it when there's a helpful utility
function.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Nick Piggin <npiggin@kernel.dk>
---
 mm/vmalloc.c |   92 ++++++++++++++++++---------------------------------------
 1 files changed, 29 insertions(+), 63 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e99aa3b..cf4e705 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -53,63 +53,34 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
 	apply_to_page_range_batch(&init_mm, addr, end - addr, vunmap_pte, NULL);
 }

-static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+struct vmap_data
 {
-	pte_t *pte;
+	struct page **pages;
+	unsigned index;
+	pgprot_t prot;
+};

-	/*
-	 * nr is a running index into the array which helps higher level
-	 * callers keep track of where we're up to.
-	 */
+static int vmap_pte(pte_t *pte, unsigned count,
+		    unsigned long addr, void *data)
+{
+	struct vmap_data *vmap = data;

-	pte = pte_alloc_kernel(pmd, addr);
-	if (!pte)
-		return -ENOMEM;
-	do {
-		struct page *page = pages[*nr];
+	while (count--) {
+		struct page *page = vmap->pages[vmap->index];

 		if (WARN_ON(!pte_none(*pte)))
 			return -EBUSY;
+
 		if (WARN_ON(!page))
 			return -ENOMEM;
-		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
-		(*nr)++;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	return 0;
-}

-static int vmap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
-{
-	pmd_t *pmd;
-	unsigned long next;
-
-	pmd = pmd_alloc(&init_mm, pud, addr);
-	if (!pmd)
-		return -ENOMEM;
-	do {
-		next = pmd_addr_end(addr, end);
-		if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
-			return -ENOMEM;
-	} while (pmd++, addr = next, addr != end);
-	return 0;
-}
+		set_pte_at(&init_mm, addr, pte, mk_pte(page, vmap->prot));

-static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
-{
-	pud_t *pud;
-	unsigned long next;
+		pte++;
+		addr += PAGE_SIZE;
+		vmap->index++;
+	}

-	pud = pud_alloc(&init_mm, pgd, addr);
-	if (!pud)
-		return -ENOMEM;
-	do {
-		next = pud_addr_end(addr, end);
-		if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
-			return -ENOMEM;
-	} while (pud++, addr = next, addr != end);
 	return 0;
 }

@@ -122,22 +93,17 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
 static int vmap_page_range_noflush(unsigned long start, unsigned long end,
 				   pgprot_t prot, struct page **pages)
 {
-	pgd_t *pgd;
-	unsigned long next;
-	unsigned long addr = start;
-	int err = 0;
-	int nr = 0;
-
-	BUG_ON(addr >= end);
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
-		if (err)
-			return err;
-	} while (pgd++, addr = next, addr != end);
-
-	return nr;
+	int err;
+	struct vmap_data vmap = {
+		.pages = pages,
+		.index = 0,
+		.prot = prot
+	};
+
+	err = apply_to_page_range_batch(&init_mm, start, end - start,
+					vmap_pte, &vmap);
+
+	return err ? err : vmap.index;
 }

 static int vmap_page_range(unsigned long start, unsigned long end,
--
1.7.4


From d4205306bb6609275ad93a8d1bfb4de3d06d0eb5 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:45:21 -0800
Subject: [PATCH 142/197] vmalloc: use apply_to_page_range_batch() in alloc_vm_area()

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 mm/vmalloc.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index cf4e705..64d395f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1993,9 +1993,9 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
 }


-static int f(pte_t *pte, unsigned long addr, void *data)
+static int f(pte_t *pte, unsigned count, unsigned long addr, void *data)
 {
-	/* apply_to_page_range() does all the hard work. */
+	/* apply_to_page_range_batch() does all the hard work. */
 	return 0;
 }

@@ -2024,8 +2024,8 @@ struct vm_struct *alloc_vm_area(size_t size)
 	 * This ensures that page tables are constructed for this region
 	 * of kernel virtual address space and mapped into init_mm.
 	 */
-	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
-				area->size, f, NULL)) {
+	if (apply_to_page_range_batch(&init_mm, (unsigned long)area->addr,
+				      area->size, f, NULL)) {
 		free_vm_area(area);
 		return NULL;
 	}
--
1.7.4


From e35361f09bf25ecb5ba6877e44319de315b76f5e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:44:04 -0800
Subject: [PATCH 143/197] xen/mmu: use apply_to_page_range_batch() in xen_remap_domain_mfn_range()

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/mmu.c |   19 ++++++++++++-------
 1 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 38ba804..25da278 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2292,14 +2292,19 @@ struct remap_data {
 	struct mmu_update *mmu_update;
 };

-static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
+static int remap_area_mfn_pte_fn(pte_t *ptep, unsigned count,
+				 unsigned long addr, void *data)
 {
 	struct remap_data *rmd = data;
-	pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));

-	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
-	rmd->mmu_update->val = pte_val_ma(pte);
-	rmd->mmu_update++;
+	while (count--) {
+		pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+
+		rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
+		rmd->mmu_update->val = pte_val_ma(pte);
+		rmd->mmu_update++;
+		ptep++;
+	}

 	return 0;
 }
@@ -2328,8 +2333,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 		range = (unsigned long)batch << PAGE_SHIFT;

 		rmd.mmu_update = mmu_update;
-		err = apply_to_page_range(vma->vm_mm, addr, range,
-					  remap_area_mfn_pte_fn, &rmd);
+		err = apply_to_page_range_batch(vma->vm_mm, addr, range,
+						remap_area_mfn_pte_fn, &rmd);
 		if (err)
 			goto out;

--
1.7.4


From 02533b01d70f7cbbe3cf47de3f27740ab334a11f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 1 Dec 2010 15:50:12 -0800
Subject: [PATCH 144/197] xen/grant-table: use apply_to_page_range_batch()

No need to call the callback per-pte.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/grant-table.c |   28 ++++++++++++++++++----------
 1 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 5bf892a..11a8a45 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -44,19 +44,27 @@

 #include <asm/pgtable.h>

-static int map_pte_fn(pte_t *pte, unsigned long addr, void *data)
+static int map_pte_fn(pte_t *pte, unsigned count, unsigned long addr, void *data)
 {
 	unsigned long **frames = (unsigned long **)data;

-	set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
-	(*frames)++;
+	while (count--) {
+		set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+		(*frames)++;
+		pte++;
+		addr += PAGE_SIZE;
+	}
 	return 0;
 }

-static int unmap_pte_fn(pte_t *pte, unsigned long addr, void *data)
+static int unmap_pte_fn(pte_t *pte, unsigned count, unsigned long addr, void *data)
 {
+	while (count--) {
+		pte_clear(&init_mm, addr, pte);
+		addr += PAGE_SIZE;
+		pte++;
+	}

-	set_pte_at(&init_mm, addr, pte, __pte(0));
 	return 0;
 }

@@ -75,15 +83,15 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
 		*__shared = shared;
 	}

-	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
-				 PAGE_SIZE * nr_gframes,
-				 map_pte_fn, &frames);
+	rc = apply_to_page_range_batch(&init_mm, (unsigned long)shared,
+				       PAGE_SIZE * nr_gframes,
+				       map_pte_fn, &frames);
 	return rc;
 }

 void arch_gnttab_unmap_shared(struct grant_entry *shared,
 			      unsigned long nr_gframes)
 {
-	apply_to_page_range(&init_mm, (unsigned long)shared,
-			    PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
+	apply_to_page_range_batch(&init_mm, (unsigned long)shared,
+				  PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
 }
--
1.7.4


From cb3172f5566fe75b749b0873deedc42687c39064 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 19 Jan 2011 18:41:03 -0500
Subject: [PATCH 145/197] x86/nx: Made .bss be HPAGE_ALIGNED.

That makes it boot under Xen.
---
 arch/x86/kernel/vmlinux.lds.S |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index b34ab80..e37d10f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -341,7 +341,7 @@ SECTIONS
 #endif

 	/* BSS */
-	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(HPAGE_SIZE);
 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
 		__bss_start = .;
 		*(.bss..page_aligned)
--
1.7.4


From 8751f3b0fd2ca59c410052d1faecc2297bb91d62 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 24 Jan 2011 17:25:29 -0800
Subject: [PATCH 146/197] xen/gntdev: remove token argument from find_grant_ptes

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/xen/gntdev.c |    3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 1e31cdc..2b777c0 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -226,8 +226,7 @@ static void gntdev_free_map(struct grant_map *map)

 /* ------------------------------------------------------------------ */

-static int find_grant_ptes(pte_t *pte, pgtable_t token,
-		unsigned long addr, void *data)
+static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
 {
 	struct grant_map *map = data;
 	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
--
1.7.4


From 4cbccec60310d7a000352d6da966c049f98bd56a Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 27 Jan 2011 15:43:46 +0000
Subject: [PATCH 148/197] xen: netback: handle incoming GSO SKBs which are not CHECKSUM_PARTIAL

The Linux network stack expects all GSO SKBs to have ip_summed ==
CHECKSUM_PARTIAL (which implies that the frame contains a partial
checksum) and the Xen network ring protocol similarly expects an SKB
which has GSO set to also have NETRX_csum_blank (which also implies a
partial checksum).

However there have been cases of buggy guests which mark a frame as
GSO but do not set csum_blank. If we detect that we a receiving such a
frame (which manifests as ip_summed != PARTIAL && skb_is_gso) then
force the SKB to partial and recalculate the checksum, since we cannot
rely on the peer having done so if they have not set csum_blank.

Add an ethtool stat to track occurances of this event.

A corresponding fix was made to netfront in e0ce4af920eb028f38bfd680b1d733f4c7a0b7cf.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: xen-devel@lists.xensource.com
---
 drivers/xen/netback/common.h    |    1 +
 drivers/xen/netback/interface.c |    9 +++++++-
 drivers/xen/netback/netback.c   |   43 ++++++++++++++++++++++++++++++++------
 3 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index 7e03a46..f660eb5 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -101,6 +101,7 @@ struct xen_netif {

 	/* Statistics */
 	int nr_copied_skbs;
+	int rx_gso_checksum_fixup;

 	/* Miscellaneous private stuff. */
 	struct list_head list;  /* scheduling list */
diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index efdc21c..d3af68e 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -189,7 +189,14 @@ static const struct netif_stat {
 	char name[ETH_GSTRING_LEN];
 	u16 offset;
 } netbk_stats[] = {
-	{ "copied_skbs", offsetof(struct xen_netif, nr_copied_skbs) },
+	{
+		"copied_skbs",
+		offsetof(struct xen_netif, nr_copied_skbs)
+	},
+	{
+		"rx_gso_checksum_fixup",
+		offsetof(struct xen_netif, rx_gso_checksum_fixup)
+	},
 };

 static int netbk_get_sset_count(struct net_device *dev, int string_set)
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 53b3a0e..8189199 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1222,11 +1222,28 @@ static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *g
 	return 0;
 }

-static int skb_checksum_setup(struct sk_buff *skb)
+static int checksum_setup(struct xen_netif *netif, struct sk_buff *skb)
 {
 	struct iphdr *iph;
 	unsigned char *th;
 	int err = -EPROTO;
+	int recalculate_partial_csum = 0;
+
+	/*
+	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
+	 * peers can fail to set NETRXF_csum_blank when sending a GSO
+	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
+	 * recalculate the partial checksum.
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
+		netif->rx_gso_checksum_fixup++;
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		recalculate_partial_csum = 1;
+	}
+
+	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;

 	if (skb->protocol != htons(ETH_P_IP))
 		goto out;
@@ -1240,9 +1257,23 @@ static int skb_checksum_setup(struct sk_buff *skb)
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
 		skb->csum_offset = offsetof(struct tcphdr, check);
+
+		if (recalculate_partial_csum) {
+			struct tcphdr *tcph = (struct tcphdr *)th;
+			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+							 skb->len - iph->ihl*4,
+							 IPPROTO_TCP, 0);
+		}
 		break;
 	case IPPROTO_UDP:
 		skb->csum_offset = offsetof(struct udphdr, check);
+
+		if (recalculate_partial_csum) {
+			struct udphdr *udph = (struct udphdr *)th;
+			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+							 skb->len - iph->ihl*4,
+							 IPPROTO_UDP, 0);
+		}
 		break;
 	default:
 		if (net_ratelimit())
@@ -1496,12 +1527,10 @@ static void net_tx_submit(struct xen_netbk *netbk)
 		skb->dev      = netif->dev;
 		skb->protocol = eth_type_trans(skb, skb->dev);

-		if (skb->ip_summed == CHECKSUM_PARTIAL) {
-			if (skb_checksum_setup(skb)) {
-				DPRINTK("Can't setup checksum in net_tx_action\n");
-				kfree_skb(skb);
-				continue;
-			}
+		if (checksum_setup(netif, skb)) {
+			DPRINTK("Can't setup checksum in net_tx_action\n");
+			kfree_skb(skb);
+			continue;
 		}

 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
--
1.7.4


From 6838c800b73d62ed7e9565d7ff584d3671ec5dd6 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 18 Jan 2011 11:37:12 +0000
Subject: [PATCH 149/197] xen: netback: rationalise types used in count_skb_slots

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |   10 +++-------
 1 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 8189199..22c1fa5 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -277,14 +277,10 @@ static inline int netbk_queue_full(struct xen_netif *netif)

 /* Figure out how many ring slots we're going to need to send @skb to
    the guest. */
-static unsigned count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
 {
-	unsigned count;
-	unsigned copy_off;
-	unsigned i;
-
-	copy_off = 0;
-	count = 1;
+	unsigned int count = 1;
+	int i, copy_off = 0;

 	BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);

--
1.7.4


From 6527724a1af638f8d9c96d40d3b180278eb2b0c7 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 18 Jan 2011 11:21:35 +0000
Subject: [PATCH 150/197] xen: netback: refactor logic for moving to a new receive buffer.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |   80 ++++++++++++++++++++++++-----------------
 1 files changed, 47 insertions(+), 33 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 22c1fa5..909e0ef 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -275,8 +275,51 @@ static inline int netbk_queue_full(struct xen_netif *netif)
 	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
 }

-/* Figure out how many ring slots we're going to need to send @skb to
-   the guest. */
+/*
+ * Returns true if we should start a new receive buffer instead of
+ * adding 'size' bytes to a buffer which currently contains 'offset'
+ * bytes.
+ */
+static bool start_new_rx_buffer(int offset, unsigned long size, int head)
+{
+	/* simple case: we have completely filled the current buffer. */
+	if (offset == MAX_BUFFER_OFFSET)
+		return true;
+
+	/*
+	 * complex case: start a fresh buffer if the current frag
+	 * would overflow the current buffer but only if:
+	 *     (i)   this frag would fit completely in the next buffer
+	 * and (ii)  there is already some data in the current buffer
+	 * and (iii) this is not the head buffer.
+	 *
+	 * Where:
+	 * - (i) stops us splitting a frag into two copies
+	 *   unless the frag is too large for a single buffer.
+	 * - (ii) stops us from leaving a buffer pointlessly empty.
+	 * - (iii) stops us leaving the first buffer
+	 *   empty. Strictly speaking this is already covered
+	 *   by (ii) but is explicitly checked because
+	 *   netfront relies on the first buffer being
+	 *   non-empty and can crash otherwise.
+	 *
+	 * This means we will effectively linearise small
+	 * frags but do not needlessly split large buffers
+	 * into multiple copies tend to give large frags their
+	 * own buffers as before.
+	 */
+	if ((offset + size > MAX_BUFFER_OFFSET) &&
+	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
+		return true;
+
+	return false;
+}
+
+/*
+ * Figure out how many ring slots we're going to need to send @skb to
+ * the guest. This function is essentially a dry run of
+ * netbk_gop_frag_copy.
+ */
 static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
 {
 	unsigned int count = 1;
@@ -295,9 +338,7 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif
 		while (size > 0) {
 			BUG_ON(copy_off > MAX_BUFFER_OFFSET);

-			/* These checks are the same as in netbk_gop_frag_copy */
-			if (copy_off == MAX_BUFFER_OFFSET
-			    || ((copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && copy_off)) {
+			if (start_new_rx_buffer(copy_off, size, 0)) {
 				count++;
 				copy_off = 0;
 			}
@@ -403,34 +444,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 	while (size > 0) {
 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);

-		/*
-		 * Move to a new receive buffer if:
-		 *
-		 * simple case: we have completely filled the current buffer.
-		 *
-		 * complex case: the current frag would overflow
-		 * the current buffer but only if:
-		 *     (i)   this frag would fit completely in the next buffer
-		 * and (ii)  there is already some data in the current buffer
-		 * and (iii) this is not the head buffer.
-		 *
-		 * Where:
-		 * - (i) stops us splitting a frag into two copies
-		 *   unless the frag is too large for a single buffer.
-		 * - (ii) stops us from leaving a buffer pointlessly empty.
-		 * - (iii) stops us leaving the first buffer
-		 *   empty. Strictly speaking this is already covered
-		 *   by (ii) but is explicitly checked because
-		 *   netfront relies on the first buffer being
-		 *   non-empty and can crash otherwise.
-		 *
-		 * This means we will effectively linearise small
-		 * frags but do not needlessly split large buffers
-		 * into multiple copies tend to give large frags their
-		 * own buffers as before.
-		 */
-		if (npo->copy_off == MAX_BUFFER_OFFSET
-		    || ((npo->copy_off + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && npo->copy_off && !head)) {
+		if (start_new_rx_buffer(npo->copy_off, size, head)) {
 			struct xen_netif_rx_request *req;

 			BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
--
1.7.4


From cee0b1c0ea524ddc735ad04b9e6e548f3f5de42a Mon Sep 17 00:00:00 2001
From: Stephen Tweedie <sct@redhat.com>
Date: Fri, 6 Feb 2009 19:09:47 -0800
Subject: [PATCH 151/197] xen dom0: Add support for the platform_ops hypercall

Minimal changes to get platform ops (renamed dom0_ops on pv_ops) working
on pv_ops builds.  Pulls in upstream linux-2.6.18-xen.hg's platform.h

[ Impact: add Xen hypercall definitions ]

Signed-off-by: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/xen/hypercall.h |    8 ++
 include/xen/interface/platform.h     |  222 ++++++++++++++++++++++++++++++++++
 include/xen/interface/xen.h          |    2 +
 3 files changed, 232 insertions(+), 0 deletions(-)
 create mode 100644 include/xen/interface/platform.h

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a3c28ae..3d10d04 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -45,6 +45,7 @@
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
+#include <xen/interface/platform.h>

 /*
  * The hypercall asms have to meet several constraints:
@@ -299,6 +300,13 @@ HYPERVISOR_set_timer_op(u64 timeout)
 }

 static inline int
+HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
+{
+	platform_op->interface_version = XENPF_INTERFACE_VERSION;
+	return _hypercall1(int, dom0_op, platform_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
new file mode 100644
index 0000000..83e4714
--- /dev/null
+++ b/include/xen/interface/platform.h
@@ -0,0 +1,222 @@
+/******************************************************************************
+ * platform.h
+ *
+ * Hardware platform operations. Intended for use by domain-0 kernel.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_PLATFORM_H__
+#define __XEN_PUBLIC_PLATFORM_H__
+
+#include "xen.h"
+
+#define XENPF_INTERFACE_VERSION 0x03000001
+
+/*
+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
+ * 1 January, 1970 if the current system time was <system_time>.
+ */
+#define XENPF_settime             17
+struct xenpf_settime {
+    /* IN variables. */
+    uint32_t secs;
+    uint32_t nsecs;
+    uint64_t system_time;
+};
+typedef struct xenpf_settime xenpf_settime_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t);
+
+/*
+ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
+ * On x86, @type is an architecture-defined MTRR memory type.
+ * On success, returns the MTRR that was used (@reg) and a handle that can
+ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting.
+ * (x86-specific).
+ */
+#define XENPF_add_memtype         31
+struct xenpf_add_memtype {
+    /* IN variables. */
+    unsigned long mfn;
+    uint64_t nr_mfns;
+    uint32_t type;
+    /* OUT variables. */
+    uint32_t handle;
+    uint32_t reg;
+};
+typedef struct xenpf_add_memtype xenpf_add_memtype_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_add_memtype_t);
+
+/*
+ * Tear down an existing memory-range type. If @handle is remembered then it
+ * should be passed in to accurately tear down the correct setting (in case
+ * of overlapping memory regions with differing types). If it is not known
+ * then @handle should be set to zero. In all cases @reg must be set.
+ * (x86-specific).
+ */
+#define XENPF_del_memtype         32
+struct xenpf_del_memtype {
+    /* IN variables. */
+    uint32_t handle;
+    uint32_t reg;
+};
+typedef struct xenpf_del_memtype xenpf_del_memtype_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_del_memtype_t);
+
+/* Read current type of an MTRR (x86-specific). */
+#define XENPF_read_memtype        33
+struct xenpf_read_memtype {
+    /* IN variables. */
+    uint32_t reg;
+    /* OUT variables. */
+    unsigned long mfn;
+    uint64_t nr_mfns;
+    uint32_t type;
+};
+typedef struct xenpf_read_memtype xenpf_read_memtype_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_read_memtype_t);
+
+#define XENPF_microcode_update    35
+struct xenpf_microcode_update {
+    /* IN variables. */
+    GUEST_HANDLE(void) data;          /* Pointer to microcode data */
+    uint32_t length;                  /* Length of microcode data. */
+};
+typedef struct xenpf_microcode_update xenpf_microcode_update_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_microcode_update_t);
+
+#define XENPF_platform_quirk      39
+#define QUIRK_NOIRQBALANCING      1 /* Do not restrict IO-APIC RTE targets */
+#define QUIRK_IOAPIC_BAD_REGSEL   2 /* IO-APIC REGSEL forgets its value    */
+#define QUIRK_IOAPIC_GOOD_REGSEL  3 /* IO-APIC REGSEL behaves properly     */
+struct xenpf_platform_quirk {
+    /* IN variables. */
+    uint32_t quirk_id;
+};
+typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_platform_quirk_t);
+
+#define XENPF_firmware_info       50
+#define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */
+struct xenpf_firmware_info {
+	/* IN variables. */
+	uint32_t type;
+	uint32_t index;
+	/* OUT variables. */
+	union {
+		struct {
+			/* Int13, Fn48: Check Extensions Present. */
+			uint8_t device;                   /* %dl: bios device number */
+			uint8_t version;                  /* %ah: major version      */
+			uint16_t interface_support;       /* %cx: support bitmap     */
+			/* Int13, Fn08: Legacy Get Device Parameters. */
+			uint16_t legacy_max_cylinder;     /* %cl[7:6]:%ch: max cyl # */
+			uint8_t legacy_max_head;          /* %dh: max head #         */
+			uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector #  */
+			/* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+			/* NB. First uint16_t of buffer must be set to buffer size.      */
+			GUEST_HANDLE(void) edd_params;
+		} disk_info; /* XEN_FW_DISK_INFO */
+		struct {
+			uint8_t device;                   /* bios device number  */
+			uint32_t mbr_signature;           /* offset 0x1b8 in mbr */
+		} disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+		struct {
+			/* Int10, AX=4F15: Get EDID info. */
+			uint8_t capabilities;
+			uint8_t edid_transfer_time;
+			/* must refer to 128-byte buffer */
+			GUEST_HANDLE(uchar) edid;
+		} vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+	} u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t);
+
+#define XENPF_enter_acpi_sleep    51
+struct xenpf_enter_acpi_sleep {
+	/* IN variables */
+	uint16_t pm1a_cnt_val;      /* PM1a control value. */
+	uint16_t pm1b_cnt_val;      /* PM1b control value. */
+	uint32_t sleep_state;       /* Which state to enter (Sn). */
+	uint32_t flags;             /* Must be zero. */
+};
+typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_enter_acpi_sleep_t);
+
+#define XENPF_change_freq         52
+struct xenpf_change_freq {
+	/* IN variables */
+	uint32_t flags; /* Must be zero. */
+	uint32_t cpu;   /* Physical cpu. */
+	uint64_t freq;  /* New frequency (Hz). */
+};
+typedef struct xenpf_change_freq xenpf_change_freq_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_change_freq_t);
+
+/*
+ * Get idle times (nanoseconds since boot) for physical CPUs specified in the
+ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is
+ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap
+ * bit set are written to. On return, @cpumap_bitmap is modified so that any
+ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry
+ * cleared.
+ */
+#define XENPF_getidletime         53
+struct xenpf_getidletime {
+	/* IN/OUT variables */
+	/* IN: CPUs to interrogate; OUT: subset of IN which are present */
+	GUEST_HANDLE(uchar) cpumap_bitmap;
+	/* IN variables */
+	/* Size of cpumap bitmap. */
+	uint32_t cpumap_nr_cpus;
+	/* Must be indexable for every cpu in cpumap_bitmap. */
+	GUEST_HANDLE(uint64_t) idletime;
+	/* OUT variables */
+	/* System time when the idletime snapshots were taken. */
+	uint64_t now;
+};
+typedef struct xenpf_getidletime xenpf_getidletime_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
+
+struct xen_platform_op {
+	uint32_t cmd;
+	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
+	union {
+		struct xenpf_settime           settime;
+		struct xenpf_add_memtype       add_memtype;
+		struct xenpf_del_memtype       del_memtype;
+		struct xenpf_read_memtype      read_memtype;
+		struct xenpf_microcode_update  microcode;
+		struct xenpf_platform_quirk    platform_quirk;
+		struct xenpf_firmware_info     firmware_info;
+		struct xenpf_enter_acpi_sleep  enter_acpi_sleep;
+		struct xenpf_change_freq       change_freq;
+		struct xenpf_getidletime       getidletime;
+		uint8_t                        pad[128];
+	} u;
+};
+typedef struct xen_platform_op xen_platform_op_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_platform_op_t);
+
+#endif /* __XEN_PUBLIC_PLATFORM_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e..18b5599 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -461,6 +461,8 @@ typedef uint8_t xen_domain_handle_t[16];
 #define __mk_unsigned_long(x) x ## UL
 #define mk_unsigned_long(x) __mk_unsigned_long(x)

+DEFINE_GUEST_HANDLE(uint64_t);
+
 #else /* __ASSEMBLY__ */

 /* In assembly code we cannot use C numeric constant suffixes. */
--
1.7.4


From 0a49ceea0d032864a72a8744c82c3786a01f34f4 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 27 Mar 2009 17:39:15 -0700
Subject: [PATCH 152/197] xen: add CPU microcode update driver

Xen does all the hard work for us, including choosing the right update
method for this cpu type and actually doing it for all cpus.  We just
need to supply it with the firmware blob.

Because Xen updates all CPUs (and the kernel's virtual cpu numbers have
no fixed relationship with the underlying physical cpus), we only bother
doing anything for cpu "0".

[ Impact: allow CPU microcode update in Xen dom0 ]
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/include/asm/microcode.h |    9 ++
 arch/x86/kernel/Makefile         |    1 +
 arch/x86/kernel/microcode_core.c |    5 +-
 arch/x86/kernel/microcode_xen.c  |  198 ++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/Kconfig             |    4 +
 5 files changed, 216 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kernel/microcode_xen.c

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 2421507..22677d6 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -61,4 +61,13 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
 }
 #endif

+#ifdef CONFIG_MICROCODE_XEN
+extern struct microcode_ops * __init init_xen_microcode(void);
+#else
+static inline struct microcode_ops * __init init_xen_microcode(void)
+{
+	return NULL;
+}
+#endif
+
 #endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34244b2..8fd7a4e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -104,6 +104,7 @@ obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 microcode-y				:= microcode_core.o
 microcode-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
 microcode-$(CONFIG_MICROCODE_AMD)	+= microcode_amd.o
+microcode-$(CONFIG_MICROCODE_XEN)	+= microcode_xen.o
 obj-$(CONFIG_MICROCODE)			+= microcode.o

 obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 1cca374..6550539 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -83,6 +83,7 @@
 #include <linux/fs.h>
 #include <linux/mm.h>

+#include <xen/xen.h>
 #include <asm/microcode.h>
 #include <asm/processor.h>

@@ -506,7 +507,9 @@ static int __init microcode_init(void)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	int error;

-	if (c->x86_vendor == X86_VENDOR_INTEL)
+	if (xen_pv_domain())
+		microcode_ops = init_xen_microcode();
+	else if (c->x86_vendor == X86_VENDOR_INTEL)
 		microcode_ops = init_intel_microcode();
 	else if (c->x86_vendor == X86_VENDOR_AMD)
 		microcode_ops = init_amd_microcode();
diff --git a/arch/x86/kernel/microcode_xen.c b/arch/x86/kernel/microcode_xen.c
new file mode 100644
index 0000000..9d2a06b
--- /dev/null
+++ b/arch/x86/kernel/microcode_xen.c
@@ -0,0 +1,198 @@
+/*
+ * Xen microcode update driver
+ *
+ * Xen does most of the work here.  We just pass the whole blob into
+ * Xen, and it will apply it to all CPUs as appropriate.  Xen will
+ * worry about how different CPU models are actually updated.
+ */
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/firmware.h>
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+
+#include <asm/microcode.h>
+
+#include <xen/xen.h>
+#include <xen/interface/platform.h>
+#include <xen/interface/xen.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+MODULE_DESCRIPTION("Xen microcode update driver");
+MODULE_LICENSE("GPL");
+
+struct xen_microcode {
+	size_t len;
+	char data[0];
+};
+
+static int xen_microcode_update(int cpu)
+{
+	int err;
+	struct xen_platform_op op;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct xen_microcode *uc = uci->mc;
+
+	if (uc == NULL || uc->len == 0) {
+		/*
+		 * We do all cpus at once, so we don't need to do
+		 * other cpus explicitly (besides, these vcpu numbers
+		 * have no relationship to underlying physical cpus).
+		 */
+		return 0;
+	}
+
+	op.cmd = XENPF_microcode_update;
+	set_xen_guest_handle(op.u.microcode.data, uc->data);
+	op.u.microcode.length = uc->len;
+
+	err = HYPERVISOR_dom0_op(&op);
+
+	if (err != 0)
+		printk(KERN_WARNING "microcode_xen: microcode update failed: %d\n", err);
+
+	return err;
+}
+
+static enum ucode_state xen_request_microcode_fw(int cpu, struct device *device)
+{
+	char name[30];
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	const struct firmware *firmware;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	enum ucode_state ret;
+	struct xen_microcode *uc;
+	size_t size;
+	int err;
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		snprintf(name, sizeof(name), "intel-ucode/%02x-%02x-%02x",
+			 c->x86, c->x86_model, c->x86_mask);
+		break;
+
+	case X86_VENDOR_AMD:
+		snprintf(name, sizeof(name), "amd-ucode/microcode_amd.bin");
+		break;
+
+	default:
+		return UCODE_NFOUND;
+	}
+
+	err = request_firmware(&firmware, name, device);
+	if (err) {
+		pr_debug("microcode: data file %s load failed\n", name);
+		return UCODE_NFOUND;
+	}
+
+	/*
+	 * Only bother getting real firmware for cpu 0; the others get
+	 * dummy placeholders.
+	 */
+	if (cpu == 0)
+		size = firmware->size;
+	else
+		size = 0;
+
+	if (uci->mc != NULL) {
+		vfree(uci->mc);
+		uci->mc = NULL;
+	}
+
+	ret = UCODE_ERROR;
+	uc = vmalloc(sizeof(*uc) + size);
+	if (uc == NULL)
+		goto out;
+
+	ret = UCODE_OK;
+	uc->len = size;
+	memcpy(uc->data, firmware->data, uc->len);
+
+	uci->mc = uc;
+
+out:
+	release_firmware(firmware);
+
+	return ret;
+}
+
+static enum ucode_state xen_request_microcode_user(int cpu,
+						   const void __user *buf, size_t size)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	struct xen_microcode *uc;
+	enum ucode_state ret;
+	size_t unread;
+
+	if (cpu != 0) {
+		/* No real firmware for non-zero cpus; just store a
+		   placeholder */
+		size = 0;
+	}
+
+	if (uci->mc != NULL) {
+		vfree(uci->mc);
+		uci->mc = NULL;
+	}
+
+	ret = UCODE_ERROR;
+	uc = vmalloc(sizeof(*uc) + size);
+	if (uc == NULL)
+		goto out;
+
+	uc->len = size;
+
+	ret = UCODE_NFOUND;
+
+	unread = copy_from_user(uc->data, buf, size);
+
+	if (unread != 0) {
+		printk(KERN_WARNING "failed to read %zd of %zd bytes at %p -> %p\n",
+		       unread, size, buf, uc->data);
+		goto out;
+	}
+
+	ret = UCODE_OK;
+
+out:
+	if (ret == 0)
+		uci->mc = uc;
+	else
+		vfree(uc);
+
+	return ret;
+}
+
+static void xen_microcode_fini_cpu(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	vfree(uci->mc);
+	uci->mc = NULL;
+}
+
+static int xen_collect_cpu_info(int cpu, struct cpu_signature *sig)
+{
+	sig->sig = 0;
+	sig->pf = 0;
+	sig->rev = 0;
+
+	return 0;
+}
+
+static struct microcode_ops microcode_xen_ops = {
+	.request_microcode_user		  = xen_request_microcode_user,
+	.request_microcode_fw             = xen_request_microcode_fw,
+	.collect_cpu_info                 = xen_collect_cpu_info,
+	.apply_microcode                  = xen_microcode_update,
+	.microcode_fini_cpu               = xen_microcode_fini_cpu,
+};
+
+struct microcode_ops * __init init_xen_microcode(void)
+{
+	if (!xen_initial_domain())
+		return NULL;
+	return &microcode_xen_ops;
+}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5b54892..384e0a5 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -48,3 +48,7 @@ config XEN_DEBUG_FS
 	help
 	  Enable statistics output and various tuning options in debugfs.
 	  Enabling this option may incur a significant performance overhead.
+
+config MICROCODE_XEN
+       def_bool y
+       depends on XEN_DOM0 && MICROCODE
\ No newline at end of file
--
1.7.4


From f9f91b9ffd3b53f19510e3a2079f9b9c1adbff0b Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 2 Feb 2011 11:12:39 +0000
Subject: [PATCH 153/197] xen: netback: refactor code to get next rx buffer into own function.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |   37 ++++++++++++++++++++++++++-----------
 1 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index 909e0ef..a8ee1c2 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -418,6 +418,25 @@ struct netrx_pending_operations {
 	grant_ref_t copy_gref;
 };

+static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
+						struct netrx_pending_operations *npo)
+{
+	struct netbk_rx_meta *meta;
+	struct xen_netif_rx_request *req;
+
+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+
+	meta = npo->meta + npo->meta_prod++;
+	meta->gso_size = 0;
+	meta->size = 0;
+	meta->id = req->id;
+
+	npo->copy_off = 0;
+	npo->copy_gref = req->gref;
+
+	return meta;
+}
+
 /* Set up the grant operations for this fragment.  If it's a flipping
    interface, we also set up the unmap request from here. */

@@ -445,17 +464,13 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);

 		if (start_new_rx_buffer(npo->copy_off, size, head)) {
-			struct xen_netif_rx_request *req;
-
-			BUG_ON(head); /* Netfront requires there to be some data in the head buffer. */
-			/* Overflowed this request, go to the next one */
-			req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-			meta = npo->meta + npo->meta_prod++;
-			meta->gso_size = 0;
-			meta->size = 0;
-			meta->id = req->id;
-			npo->copy_off = 0;
-			npo->copy_gref = req->gref;
+			/*
+			 * Netfront requires there to be some data in the head
+			 * buffer.
+			 */
+			BUG_ON(head);
+
+			meta = get_next_rx_buffer(netif, npo);
 		}

 		bytes = size;
--
1.7.4


From e7a721ae14ba202aa837ee8f3b8d1c990cf13766 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 2 Feb 2011 11:14:17 +0000
Subject: [PATCH 154/197] xen: netback: simplify use of netbk_add_frag_responses

Move all the logic into the function instead of having some in the caller.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/netback.c |   14 +++++++++-----
 1 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index a8ee1c2..cfe7931 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -599,6 +599,12 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
 	int i;
 	unsigned long offset;

+	/* No fragments used */
+	if (nr_meta_slots <= 1)
+		return;
+
+	nr_meta_slots--;
+
 	for (i = 0; i < nr_meta_slots; i++) {
 		int flags;
 		if (i == nr_meta_slots - 1)
@@ -727,11 +733,9 @@ static void net_rx_action(unsigned long data)
 			gso->flags = 0;
 		}

-		if (sco->meta_slots_used > 1) {
-			netbk_add_frag_responses(netif, status,
-						 netbk->meta + npo.meta_cons + 1,
-						 sco->meta_slots_used - 1);
-		}
+		netbk_add_frag_responses(netif, status,
+					 netbk->meta + npo.meta_cons + 1,
+					 sco->meta_slots_used);

 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
 		irq = netif->irq;
--
1.7.4


From 2a7ba9eab324f4b00ec70d2e163f7cd67e1c4241 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 24 Dec 2010 13:37:04 +0000
Subject: [PATCH 155/197] xen: netback: cleanup coding style

Fix checkpatch.pl errors plus manual sweep.

Including:
- remove incorrect and unnecessary filenames from comment headers.
- do not include <linux/version.h>

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/common.h    |   57 ++----------------------
 drivers/xen/netback/interface.c |   18 +++----
 drivers/xen/netback/netback.c   |   93 ++++++++++++++++++++-------------------
 drivers/xen/netback/xenbus.c    |   65 ++++++++++++---------------
 4 files changed, 89 insertions(+), 144 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index f660eb5..a2455a0 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -1,6 +1,4 @@
-/******************************************************************************
- * arch/xen/drivers/netif/backend/common.h
- *
+/*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version 2
  * as published by the Free Software Foundation; or, when distributed
@@ -29,19 +27,18 @@
 #ifndef __NETIF__BACKEND__COMMON_H__
 #define __NETIF__BACKEND__COMMON_H__

-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/ip.h>
 #include <linux/in.h>
+#include <linux/io.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/wait.h>
 #include <linux/sched.h>

 #include <xen/interface/io/netif.h>
-#include <asm/io.h>
 #include <asm/pgalloc.h>
 #include <xen/interface/grant_table.h>
 #include <xen/grant_table.h>
@@ -49,7 +46,7 @@

 #define DPRINTK(_f, _a...)			\
 	pr_debug("(file=%s, line=%d) " _f,	\
-		 __FILE__ , __LINE__ , ## _a )
+		 __FILE__ , __LINE__ , ## _a)
 #define IPRINTK(fmt, args...)				\
 	printk(KERN_INFO "xen_net: " fmt, ##args)
 #define WPRINTK(fmt, args...)				\
@@ -132,66 +129,22 @@ enum {

 extern int netbk_copy_skb_mode;

-/* Function pointers into netback accelerator plugin modules */
-struct netback_accel_hooks {
-	struct module *owner;
-	int  (*probe)(struct xenbus_device *dev);
-	int (*remove)(struct xenbus_device *dev);
-};
-
-/* Structure to track the state of a netback accelerator plugin */
-struct netback_accelerator {
-	struct list_head link;
-	int id;
-	char *eth_name;
-	atomic_t use_count;
-	struct netback_accel_hooks *hooks;
-};
-
 struct backend_info {
 	struct xenbus_device *dev;
 	struct xen_netif *netif;
 	enum xenbus_state frontend_state;
 	struct xenbus_watch hotplug_status_watch;
 	int have_hotplug_status_watch:1;
-
-	/* State relating to the netback accelerator */
-	void *netback_accel_priv;
-	/* The accelerator that this backend is currently using */
-	struct netback_accelerator *accelerator;
 };

-#define NETBACK_ACCEL_VERSION 0x00010001
-
-/*
- * Connect an accelerator plugin module to netback.  Returns zero on
- * success, < 0 on error, > 0 (with highest version number supported)
- * if version mismatch.
- */
-extern int netback_connect_accelerator(unsigned version,
-				       int id, const char *eth_name,
-				       struct netback_accel_hooks *hooks);
-/* Disconnect a previously connected accelerator plugin module */
-extern void netback_disconnect_accelerator(int id, const char *eth_name);
-
-
-extern
-void netback_probe_accelerators(struct backend_info *be,
-				struct xenbus_device *dev);
-extern
-void netback_remove_accelerators(struct backend_info *be,
-				 struct xenbus_device *dev);
-extern
-void netif_accel_init(void);
-
-
 #define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
 #define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)

 void netif_disconnect(struct xen_netif *netif);

 void netif_set_features(struct xen_netif *netif);
-struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
+struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+			      unsigned int handle);
 int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
 	      unsigned long rx_ring_ref, unsigned int evtchn);

diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index d3af68e..4622653 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -1,6 +1,4 @@
-/******************************************************************************
- * arch/xen/drivers/netif/backend/interface.c
- *
+/*
  * Network-device interface management.
  *
  * Copyright (c) 2004-2005, Keir Fraser
@@ -232,8 +230,7 @@ static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
 	}
 }

-static struct ethtool_ops network_ethtool_ops =
-{
+static struct ethtool_ops network_ethtool_ops = {
 	.get_drvinfo = netbk_get_drvinfo,

 	.get_tx_csum = ethtool_op_get_tx_csum,
@@ -249,8 +246,7 @@ static struct ethtool_ops network_ethtool_ops =
 	.get_strings = netbk_get_strings,
 };

-static struct net_device_ops netback_ops =
-{
+static struct net_device_ops netback_ops = {
 	.ndo_start_xmit	= netif_be_start_xmit,
 	.ndo_get_stats	= netif_be_get_stats,
 	.ndo_open	= net_open,
@@ -258,7 +254,8 @@ static struct net_device_ops netback_ops =
 	.ndo_change_mtu	= netbk_change_mtu,
 };

-struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
+struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+			      unsigned int handle)
 {
 	int err = 0;
 	struct net_device *dev;
@@ -323,8 +320,9 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid, unsigned int
 	return netif;
 }

-static int map_frontend_pages(
-	struct xen_netif *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
+static int map_frontend_pages(struct xen_netif *netif,
+			      grant_ref_t tx_ring_ref,
+			      grant_ref_t rx_ring_ref)
 {
 	struct gnttab_map_grant_ref op;

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index cfe7931..d4aa8ac 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -1,11 +1,9 @@
-/******************************************************************************
- * drivers/xen/netback/netback.c
- *
+/*
  * Back-end of the driver for virtual network devices. This portion of the
  * driver exports a 'unified' network-device interface that can be accessed
  * by any operating system that implements a compatible front end. A
  * reference front-end implementation can be found in:
- *  drivers/xen/netfront/netfront.c
+ *  drivers/net/xen-netfront.c
  *
  * Copyright (c) 2002-2005, K A Fraser
  *
@@ -82,8 +80,8 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
 }

 /* extra field used in struct page */
-static inline void netif_set_page_ext(struct page *pg, unsigned int group,
-		unsigned int idx)
+static inline void netif_set_page_ext(struct page *pg,
+				      unsigned int group, unsigned int idx)
 {
 	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };

@@ -91,7 +89,8 @@ static inline void netif_set_page_ext(struct page *pg, unsigned int group,
 	pg->mapping = ext.mapping;
 }

-static inline int netif_get_page_ext(struct page *pg, unsigned int *_group, unsigned int *_idx)
+static int netif_get_page_ext(struct page *pg,
+			      unsigned int *_group, unsigned int *_idx)
 {
 	union page_ext ext = { .mapping = pg->mapping };
 	struct xen_netbk *netbk;
@@ -325,7 +324,7 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif
 	unsigned int count = 1;
 	int i, copy_off = 0;

-	BUG_ON(offset_in_page(skb->data) + skb_headlen(skb) > MAX_BUFFER_OFFSET);
+	BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);

 	copy_off = skb_headlen(skb);

@@ -376,7 +375,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
 		struct sk_buff *nskb = netbk_copy_skb(skb);
-		if ( unlikely(nskb == NULL) )
+		if (unlikely(nskb == NULL))
 			goto drop;
 		/* Copy only the header fields we use in this driver. */
 		nskb->dev = skb->dev;
@@ -385,8 +384,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		skb = nskb;
 	}

-	/* Reserve ring slots for the worst-case number of
-	 * fragments. */
+	/* Reserve ring slots for the worst-case number of fragments. */
 	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
 	netif_get(netif);

@@ -437,9 +435,10 @@ static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
 	return meta;
 }

-/* Set up the grant operations for this fragment.  If it's a flipping
-   interface, we also set up the unmap request from here. */
-
+/*
+ * Set up the grant operations for this fragment. If it's a flipping
+ * interface, we also set up the unmap request from here.
+ */
 static void netbk_gop_frag_copy(struct xen_netif *netif,
 				struct netrx_pending_operations *npo,
 				struct page *page, unsigned long size,
@@ -450,7 +449,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 	/*
 	 * These variables a used iff netif_get_page_ext returns true,
 	 * in which case they are guaranteed to be initialized.
-         */
+	 */
 	unsigned int uninitialized_var(group), uninitialized_var(idx);
 	int foreign = netif_get_page_ext(page, &group, &idx);
 	unsigned long bytes;
@@ -489,8 +488,9 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 			copy_gop->source.u.ref = src_pend->req.gref;
 			copy_gop->flags |= GNTCOPY_source_gref;
 		} else {
+			void *vaddr = page_address(page);
 			copy_gop->source.domid = DOMID_SELF;
-			copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
+			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
 		}
 		copy_gop->source.offset = offset;
 		copy_gop->dest.domid = netif->domid;
@@ -504,17 +504,22 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,

 		offset += bytes;
 		size -= bytes;
-		head = 0; /* Must be something in this buffer now */
+		head = 0; /* There must be something in this buffer now. */
 	}
 }

-/* Prepare an SKB to be transmitted to the frontend.  This is
-   responsible for allocating grant operations, meta structures, etc.
-   It returns the number of meta structures consumed.  The number of
-   ring slots used is always equal to the number of meta slots used
-   plus the number of GSO descriptors used.  Currently, we use either
-   zero GSO descriptors (for non-GSO packets) or one descriptor (for
-   frontend-side LRO). */
+/*
+ * Prepare an SKB to be transmitted to the frontend.
+ *
+ * This function is responsible for allocating grant operations, meta
+ * structures, etc.
+ *
+ * It returns the number of meta structures consumed. The number of
+ * ring slots used is always equal to the number of meta slots used
+ * plus the number of GSO descriptors used. Currently, we use either
+ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
+ * frontend-side LRO).
+ */
 static int netbk_gop_skb(struct sk_buff *skb,
 			 struct netrx_pending_operations *npo)
 {
@@ -569,10 +574,12 @@ static int netbk_gop_skb(struct sk_buff *skb,
 	return npo->meta_prod - old_meta_prod;
 }

-/* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
-   used to set up the operations on the top of
-   netrx_pending_operations, which have since been done.  Check that
-   they didn't give any errors and advance over them. */
+/*
+ * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
+ * used to set up the operations on the top of
+ * netrx_pending_operations, which have since been done.  Check that
+ * they didn't give any errors and advance over them.
+ */
 static int netbk_check_gop(int nr_meta_slots, domid_t domid,
 			   struct netrx_pending_operations *npo)
 {
@@ -906,9 +913,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
 	dc = netbk->dealloc_cons;
 	gop = netbk->tx_unmap_ops;

-	/*
-	 * Free up any grants we have finished using
-	 */
+	/* Free up any grants we have finished using. */
 	do {
 		dp = netbk->dealloc_prod;

@@ -1018,7 +1023,8 @@ static void netbk_tx_err(struct xen_netif *netif,

 static int netbk_count_requests(struct xen_netif *netif,
 				struct xen_netif_tx_request *first,
-				struct xen_netif_tx_request *txp, int work_to_do)
+				struct xen_netif_tx_request *txp,
+				int work_to_do)
 {
 	RING_IDX cons = netif->tx.req_cons;
 	int frags = 0;
@@ -1058,10 +1064,10 @@ static int netbk_count_requests(struct xen_netif *netif,
 }

 static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
-						  struct xen_netif *netif,
-						  struct sk_buff *skb,
-						  struct xen_netif_tx_request *txp,
-						  struct gnttab_map_grant_ref *mop)
+						       struct xen_netif *netif,
+						       struct sk_buff *skb,
+						       struct xen_netif_tx_request *txp,
+						       struct gnttab_map_grant_ref *mop)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	skb_frag_t *frags = shinfo->frags;
@@ -1200,7 +1206,8 @@ static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
 	}
 }

-int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extras,
+int netbk_get_extras(struct xen_netif *netif,
+		     struct xen_netif_extra_info *extras,
 		     int work_to_do)
 {
 	struct xen_netif_extra_info extra;
@@ -1228,7 +1235,8 @@ int netbk_get_extras(struct xen_netif *netif, struct xen_netif_extra_info *extra
 	return work_to_do;
 }

-static int netbk_set_skb_gso(struct sk_buff *skb, struct xen_netif_extra_info *gso)
+static int netbk_set_skb_gso(struct sk_buff *skb,
+			     struct xen_netif_extra_info *gso)
 {
 	if (!gso->u.gso.size) {
 		DPRINTK("GSO size must not be zero.\n");
@@ -1365,7 +1373,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
 		struct xen_netif *netif;
 		struct xen_netif_tx_request txreq;
 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
-		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
 		u16 pending_idx;
 		RING_IDX idx;
 		int work_to_do;
@@ -1427,7 +1435,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
 		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
 			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
 				txreq.offset, txreq.size,
-				(txreq.offset &~PAGE_MASK) + txreq.size);
+				(txreq.offset&~PAGE_MASK) + txreq.size);
 			netbk_tx_err(netif, &txreq, idx);
 			continue;
 		}
@@ -1807,9 +1815,6 @@ static int __init netback_init(void)
 	}
 	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);

-	/* We can increase reservation by this much in net_rx_action(). */
-//	balloon_update_driver_allowance(NET_RX_RING_SIZE);
-
 	for (group = 0; group < xen_netbk_group_nr; group++) {
 		struct xen_netbk *netbk = &xen_netbk[group];
 		skb_queue_head_init(&netbk->rx_queue);
@@ -1894,8 +1899,6 @@ static int __init netback_init(void)
 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
 	}

-	//netif_accel_init();
-
 	rc = netif_xenbus_init();
 	if (rc)
 		goto failed_init;
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index 1fec65a..dd44341 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -1,20 +1,22 @@
-/*  Xenbus code for netif backend
-    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
-    Copyright (C) 2005 XenSource Ltd
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+/*
+ * Xenbus code for netif backend
+ *
+ * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
+ * Copyright (C) 2005 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

 #include <stdarg.h>
@@ -22,13 +24,6 @@
 #include <xen/xenbus.h>
 #include "common.h"

-#if 0
-#undef DPRINTK
-#define DPRINTK(fmt, args...) \
-    printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
-#endif
-
-
 static int connect_rings(struct backend_info *);
 static void connect(struct backend_info *);
 static void backend_create_netif(struct backend_info *be);
@@ -36,9 +31,7 @@ static void unregister_hotplug_status_watch(struct backend_info *be);

 static int netback_remove(struct xenbus_device *dev)
 {
-  struct backend_info *be = dev_get_drvdata(&dev->dev);
-
-	//netback_remove_accelerators(be, dev);
+	struct backend_info *be = dev_get_drvdata(&dev->dev);

 	unregister_hotplug_status_watch(be);
 	if (be->netif) {
@@ -126,8 +119,6 @@ static int netback_probe(struct xenbus_device *dev,
 		goto fail;
 	}

-	//netback_probe_accelerators(be, dev);
-
 	err = xenbus_switch_state(dev, XenbusStateInitWait);
 	if (err)
 		goto fail;
@@ -147,12 +138,13 @@ fail:
 }


-/**
+/*
  * Handle the creation of the hotplug script environment.  We add the script
  * and vif variables to the environment, for the benefit of the vif-* hotplug
  * scripts.
  */
-static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
+static int netback_uevent(struct xenbus_device *xdev,
+			  struct kobj_uevent_env *env)
 {
 	struct backend_info *be = dev_get_drvdata(&xdev->dev);
 	char *val;
@@ -164,8 +156,7 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
 		int err = PTR_ERR(val);
 		xenbus_dev_fatal(xdev, err, "reading script");
 		return err;
-	}
-	else {
+	} else {
 		if (add_uevent_var(env, "script=%s", val)) {
 			kfree(val);
 			return -ENOMEM;
@@ -173,10 +164,10 @@ static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *en
 		kfree(val);
 	}

-	if (be && be->netif && add_uevent_var(env, "vif=%s", be->netif->dev->name))
-		return -ENOMEM;
+	if (!be || !be->netif)
+		return 0;

-	return 0;
+	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
 }


@@ -234,7 +225,7 @@ static void frontend_changed(struct xenbus_device *dev,
 	case XenbusStateInitialising:
 		if (dev->state == XenbusStateClosed) {
 			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
-			       __FUNCTION__, dev->nodename);
+			       __func__, dev->nodename);
 			xenbus_switch_state(dev, XenbusStateInitWait);
 		}
 		break;
--
1.7.4


From 160279b82baaeafebd73c89eefbc8684c0dbef0c Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 19 Jan 2011 10:51:45 +0000
Subject: [PATCH 156/197] xen: netback: drop private ?PRINTK macros in favour of pr_*

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/xen/netback/common.h    |   10 ++--------
 drivers/xen/netback/interface.c |   13 +++++++------
 drivers/xen/netback/netback.c   |   38 +++++++++++++++++++-------------------
 drivers/xen/netback/xenbus.c    |   13 +++----------
 4 files changed, 31 insertions(+), 43 deletions(-)

diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
index a2455a0..49dc4cf 100644
--- a/drivers/xen/netback/common.h
+++ b/drivers/xen/netback/common.h
@@ -27,6 +27,8 @@
 #ifndef __NETIF__BACKEND__COMMON_H__
 #define __NETIF__BACKEND__COMMON_H__

+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
@@ -44,14 +46,6 @@
 #include <xen/grant_table.h>
 #include <xen/xenbus.h>

-#define DPRINTK(_f, _a...)			\
-	pr_debug("(file=%s, line=%d) " _f,	\
-		 __FILE__ , __LINE__ , ## _a)
-#define IPRINTK(fmt, args...)				\
-	printk(KERN_INFO "xen_net: " fmt, ##args)
-#define WPRINTK(fmt, args...)				\
-	printk(KERN_WARNING "xen_net: " fmt, ##args)
-
 struct xen_netif {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
index 4622653..b429f8c 100644
--- a/drivers/xen/netback/interface.c
+++ b/drivers/xen/netback/interface.c
@@ -29,6 +29,7 @@
  */

 #include "common.h"
+
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>

@@ -265,7 +266,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
 	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
 	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
 	if (dev == NULL) {
-		DPRINTK("Could not create netif: out of memory\n");
+		pr_debug("Could not allocate netdev\n");
 		return ERR_PTR(-ENOMEM);
 	}

@@ -310,13 +311,13 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
 	err = register_netdevice(dev);
 	rtnl_unlock();
 	if (err) {
-		DPRINTK("Could not register new net device %s: err=%d\n",
-			dev->name, err);
+		pr_debug("Could not register new net device %s: err=%d\n",
+			 dev->name, err);
 		free_netdev(dev);
 		return ERR_PTR(err);
 	}

-	DPRINTK("Successfully created netif\n");
+	pr_debug("Successfully created netif\n");
 	return netif;
 }

@@ -333,7 +334,7 @@ static int map_frontend_pages(struct xen_netif *netif,
 		BUG();

 	if (op.status) {
-		DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
+		pr_debug("Gnttab failure mapping tx_ring_ref!\n");
 		return op.status;
 	}

@@ -353,7 +354,7 @@ static int map_frontend_pages(struct xen_netif *netif,
 				    (unsigned long)netif->tx_comms_area->addr,
 				    GNTMAP_host_map, netif->tx_shmem_handle);
 		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
-		DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
+		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
 		return op.status;
 	}

diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
index d4aa8ac..b290525 100644
--- a/drivers/xen/netback/netback.c
+++ b/drivers/xen/netback/netback.c
@@ -590,8 +590,8 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
 	for (i = 0; i < nr_meta_slots; i++) {
 		copy_op = npo->copy + npo->copy_cons++;
 		if (copy_op->status != GNTST_okay) {
-				DPRINTK("Bad status %d from copy to DOM%d.\n",
-					copy_op->status, domid);
+				pr_debug("Bad status %d from copy to DOM%d.\n",
+					 copy_op->status, domid);
 				status = NETIF_RSP_ERROR;
 			}
 	}
@@ -1034,19 +1034,19 @@ static int netbk_count_requests(struct xen_netif *netif,

 	do {
 		if (frags >= work_to_do) {
-			DPRINTK("Need more frags\n");
+			pr_debug("Need more frags\n");
 			return -frags;
 		}

 		if (unlikely(frags >= MAX_SKB_FRAGS)) {
-			DPRINTK("Too many frags\n");
+			pr_debug("Too many frags\n");
 			return -frags;
 		}

 		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
 		       sizeof(*txp));
 		if (txp->size > first->size) {
-			DPRINTK("Frags galore\n");
+			pr_debug("Frags galore\n");
 			return -frags;
 		}

@@ -1054,8 +1054,8 @@ static int netbk_count_requests(struct xen_netif *netif,
 		frags++;

 		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
-			DPRINTK("txp->offset: %x, size: %u\n",
-				txp->offset, txp->size);
+			pr_debug("txp->offset: %x, size: %u\n",
+				 txp->offset, txp->size);
 			return -frags;
 		}
 	} while ((txp++)->flags & NETTXF_more_data);
@@ -1215,7 +1215,7 @@ int netbk_get_extras(struct xen_netif *netif,

 	do {
 		if (unlikely(work_to_do-- <= 0)) {
-			DPRINTK("Missing extra info\n");
+			pr_debug("Missing extra info\n");
 			return -EBADR;
 		}

@@ -1224,7 +1224,7 @@ int netbk_get_extras(struct xen_netif *netif,
 		if (unlikely(!extra.type ||
 			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 			netif->tx.req_cons = ++cons;
-			DPRINTK("Invalid extra type: %d\n", extra.type);
+			pr_debug("Invalid extra type: %d\n", extra.type);
 			return -EINVAL;
 		}

@@ -1239,13 +1239,13 @@ static int netbk_set_skb_gso(struct sk_buff *skb,
 			     struct xen_netif_extra_info *gso)
 {
 	if (!gso->u.gso.size) {
-		DPRINTK("GSO size must not be zero.\n");
+		pr_debug("GSO size must not be zero.\n");
 		return -EINVAL;
 	}

 	/* Currently only TCPv4 S.O. is supported. */
 	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
-		DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
+		pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
 		return -EINVAL;
 	}

@@ -1426,16 +1426,16 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
 		idx += ret;

 		if (unlikely(txreq.size < ETH_HLEN)) {
-			DPRINTK("Bad packet size: %d\n", txreq.size);
+			pr_debug("Bad packet size: %d\n", txreq.size);
 			netbk_tx_err(netif, &txreq, idx);
 			continue;
 		}

 		/* No crossing a page as the payload mustn't fragment. */
 		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
-			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
-				txreq.offset, txreq.size,
-				(txreq.offset&~PAGE_MASK) + txreq.size);
+			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
+				 txreq.offset, txreq.size,
+				 (txreq.offset&~PAGE_MASK) + txreq.size);
 			netbk_tx_err(netif, &txreq, idx);
 			continue;
 		}
@@ -1450,7 +1450,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
 		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
 				GFP_ATOMIC | __GFP_NOWARN);
 		if (unlikely(skb == NULL)) {
-			DPRINTK("Can't allocate a skb in start_xmit.\n");
+			pr_debug("Can't allocate a skb in start_xmit.\n");
 			netbk_tx_err(netif, &txreq, idx);
 			break;
 		}
@@ -1525,7 +1525,7 @@ static void net_tx_submit(struct xen_netbk *netbk)

 		/* Check the remap error code. */
 		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
-			DPRINTK("netback grant failed.\n");
+			pr_debug("netback grant failed.\n");
 			skb_shinfo(skb)->nr_frags = 0;
 			kfree_skb(skb);
 			continue;
@@ -1565,14 +1565,14 @@ static void net_tx_submit(struct xen_netbk *netbk)
 		skb->protocol = eth_type_trans(skb, skb->dev);

 		if (checksum_setup(netif, skb)) {
-			DPRINTK("Can't setup checksum in net_tx_action\n");
+			pr_debug("Can't setup checksum in net_tx_action\n");
 			kfree_skb(skb);
 			continue;
 		}

 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
 		    unlikely(skb_linearize(skb))) {
-			DPRINTK("Can't linearize skb in net_tx_action.\n");
+			pr_debug("Can't linearize skb in net_tx_action.\n");
 			kfree_skb(skb);
 			continue;
 		}
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
index dd44341..867dc25 100644
--- a/drivers/xen/netback/xenbus.c
+++ b/drivers/xen/netback/xenbus.c
@@ -19,9 +19,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

-#include <stdarg.h>
-#include <linux/module.h>
-#include <xen/xenbus.h>
 #include "common.h"

 static int connect_rings(struct backend_info *);
@@ -132,7 +129,7 @@ abort_transaction:
 	xenbus_transaction_end(xbt, 1);
 	xenbus_dev_fatal(dev, err, "%s", message);
 fail:
-	DPRINTK("failed");
+	pr_debug("failed");
 	netback_remove(dev);
 	return err;
 }
@@ -149,8 +146,6 @@ static int netback_uevent(struct xenbus_device *xdev,
 	struct backend_info *be = dev_get_drvdata(&xdev->dev);
 	char *val;

-	DPRINTK("netback_uevent");
-
 	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
 	if (IS_ERR(val)) {
 		int err = PTR_ERR(val);
@@ -217,7 +212,7 @@ static void frontend_changed(struct xenbus_device *dev,
 {
 	struct backend_info *be = dev_get_drvdata(&dev->dev);

-	DPRINTK("%s", xenbus_strstate(frontend_state));
+	pr_debug("frontend state %s", xenbus_strstate(frontend_state));

 	be->frontend_state = frontend_state;

@@ -297,7 +292,7 @@ static void xen_net_read_rate(struct xenbus_device *dev,
 	return;

  fail:
-	WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
+	pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
 	kfree(ratestr);
 }

@@ -396,8 +391,6 @@ static int connect_rings(struct backend_info *be)
 	int err;
 	int val;

-	DPRINTK("");
-
 	err = xenbus_gather(XBT_NIL, dev->otherend,
 			    "tx-ring-ref", "%lu", &tx_ring_ref,
 			    "rx-ring-ref", "%lu", &rx_ring_ref,
--
1.7.4


From 7ecff493c52ab8ef75b38ec70847161d8e66b804 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 18 Jan 2011 12:54:12 +0000
Subject: [PATCH 157/197] xen: netback: move under drivers/net/xen-netback/

From the kernel's PoV netback is just another network device driver.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/Kconfig                 |    7 +
 drivers/net/Makefile                |    1 +
 drivers/net/xen-netback/Makefile    |    3 +
 drivers/net/xen-netback/common.h    |  273 +++++
 drivers/net/xen-netback/interface.c |  470 +++++++++
 drivers/net/xen-netback/netback.c   | 1934 +++++++++++++++++++++++++++++++++++
 drivers/net/xen-netback/xenbus.c    |  489 +++++++++
 drivers/xen/Kconfig                 |    7 -
 drivers/xen/Makefile                |    1 -
 drivers/xen/netback/Makefile        |    3 -
 drivers/xen/netback/common.h        |  273 -----
 drivers/xen/netback/interface.c     |  470 ---------
 drivers/xen/netback/netback.c       | 1934 -----------------------------------
 drivers/xen/netback/xenbus.c        |  489 ---------
 14 files changed, 3177 insertions(+), 3177 deletions(-)
 create mode 100644 drivers/net/xen-netback/Makefile
 create mode 100644 drivers/net/xen-netback/common.h
 create mode 100644 drivers/net/xen-netback/interface.c
 create mode 100644 drivers/net/xen-netback/netback.c
 create mode 100644 drivers/net/xen-netback/xenbus.c
 delete mode 100644 drivers/xen/netback/Makefile
 delete mode 100644 drivers/xen/netback/common.h
 delete mode 100644 drivers/xen/netback/interface.c
 delete mode 100644 drivers/xen/netback/netback.c
 delete mode 100644 drivers/xen/netback/xenbus.c

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index cbf0635..5b088f5 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2970,6 +2970,13 @@ config XEN_NETDEV_FRONTEND
 	  if you are compiling a kernel for a Xen guest, you almost
 	  certainly want to enable this.

+config XEN_NETDEV_BACKEND
+	tristate "Xen backend network device"
+	depends on XEN_BACKEND
+	help
+	  Implement the network backend driver, which passes packets
+	  from the guest domain's frontend drivers to the network.
+
 config ISERIES_VETH
 	tristate "iSeries Virtual Ethernet driver support"
 	depends on PPC_ISERIES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index b90738d..145dfd7 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -171,6 +171,7 @@ obj-$(CONFIG_SLIP) += slip.o
 obj-$(CONFIG_SLHC) += slhc.o

 obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
+obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/

 obj-$(CONFIG_DUMMY) += dummy.o
 obj-$(CONFIG_IFB) += ifb.o
diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile
new file mode 100644
index 0000000..e346e81
--- /dev/null
+++ b/drivers/net/xen-netback/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
+
+xen-netback-y := netback.o xenbus.o interface.o
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
new file mode 100644
index 0000000..2d727a0
--- /dev/null
+++ b/drivers/net/xen-netback/common.h
@@ -0,0 +1,273 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_NETBACK__COMMON_H__
+#define __XEN_NETBACK__COMMON_H__
+
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/io.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include <xen/interface/io/netif.h>
+#include <asm/pgalloc.h>
+#include <xen/interface/grant_table.h>
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+
+struct xen_netif {
+	/* Unique identifier for this interface. */
+	domid_t          domid;
+	int              group;
+	unsigned int     handle;
+
+	u8               fe_dev_addr[6];
+
+	/* Physical parameters of the comms window. */
+	grant_handle_t   tx_shmem_handle;
+	grant_ref_t      tx_shmem_ref;
+	grant_handle_t   rx_shmem_handle;
+	grant_ref_t      rx_shmem_ref;
+	unsigned int     irq;
+
+	/* The shared rings and indexes. */
+	struct xen_netif_tx_back_ring tx;
+	struct xen_netif_rx_back_ring rx;
+	struct vm_struct *tx_comms_area;
+	struct vm_struct *rx_comms_area;
+
+	/* Flags that must not be set in dev->features */
+	int features_disabled;
+
+	/* Frontend feature information. */
+	u8 can_sg:1;
+	u8 gso:1;
+	u8 gso_prefix:1;
+	u8 csum:1;
+
+	/* Internal feature information. */
+	u8 can_queue:1;	    /* can queue packets for receiver? */
+
+	/* Allow netif_be_start_xmit() to peek ahead in the rx request
+	 * ring.  This is a prediction of what rx_req_cons will be once
+	 * all queued skbs are put on the ring. */
+	RING_IDX rx_req_cons_peek;
+
+	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+	unsigned long   credit_bytes;
+	unsigned long   credit_usec;
+	unsigned long   remaining_credit;
+	struct timer_list credit_timeout;
+
+	/* Statistics */
+	int nr_copied_skbs;
+	int rx_gso_checksum_fixup;
+
+	/* Miscellaneous private stuff. */
+	struct list_head list;  /* scheduling list */
+	atomic_t         refcnt;
+	struct net_device *dev;
+	struct net_device_stats stats;
+
+	unsigned int carrier;
+
+	wait_queue_head_t waiting_to_free;
+};
+
+/*
+ * Implement our own carrier flag: the network stack's version causes delays
+ * when the carrier is re-enabled (in particular, dev_activate() may not
+ * immediately be called, which can cause packet loss; also the etherbridge
+ * can be rather lazy in activating its port).
+ */
+#define netback_carrier_on(netif)	((netif)->carrier = 1)
+#define netback_carrier_off(netif)	((netif)->carrier = 0)
+#define netback_carrier_ok(netif)	((netif)->carrier)
+
+enum {
+	NETBK_DONT_COPY_SKB,
+	NETBK_DELAYED_COPY_SKB,
+	NETBK_ALWAYS_COPY_SKB,
+};
+
+extern int netbk_copy_skb_mode;
+
+struct backend_info {
+	struct xenbus_device *dev;
+	struct xen_netif *netif;
+	enum xenbus_state frontend_state;
+	struct xenbus_watch hotplug_status_watch;
+	int have_hotplug_status_watch:1;
+};
+
+#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
+
+void netif_disconnect(struct xen_netif *netif);
+
+void netif_set_features(struct xen_netif *netif);
+struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+			      unsigned int handle);
+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+	      unsigned long rx_ring_ref, unsigned int evtchn);
+
+static inline void netif_get(struct xen_netif *netif)
+{
+	atomic_inc(&netif->refcnt);
+}
+
+static inline void  netif_put(struct xen_netif *netif)
+{
+	if (atomic_dec_and_test(&netif->refcnt))
+		wake_up(&netif->waiting_to_free);
+}
+
+int netif_xenbus_init(void);
+
+#define netif_schedulable(netif)				\
+	(netif_running((netif)->dev) && netback_carrier_ok(netif))
+
+void netif_schedule_work(struct xen_netif *netif);
+void netif_deschedule_work(struct xen_netif *netif);
+
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
+struct net_device_stats *netif_be_get_stats(struct net_device *dev);
+irqreturn_t netif_be_int(int irq, void *dev_id);
+
+static inline int netbk_can_queue(struct net_device *dev)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	return netif->can_queue;
+}
+
+static inline int netbk_can_sg(struct net_device *dev)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	return netif->can_sg;
+}
+
+struct pending_tx_info {
+	struct xen_netif_tx_request req;
+	struct xen_netif *netif;
+};
+typedef unsigned int pending_ring_idx_t;
+
+struct netbk_rx_meta {
+	int id;
+	int size;
+	int gso_size;
+};
+
+struct netbk_tx_pending_inuse {
+	struct list_head list;
+	unsigned long alloc_time;
+};
+
+#define MAX_PENDING_REQS 256
+
+#define MAX_BUFFER_OFFSET PAGE_SIZE
+
+/* extra field used in struct page */
+union page_ext {
+	struct {
+#if BITS_PER_LONG < 64
+#define IDX_WIDTH   8
+#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
+		unsigned int group:GROUP_WIDTH;
+		unsigned int idx:IDX_WIDTH;
+#else
+		unsigned int group, idx;
+#endif
+	} e;
+	void *mapping;
+};
+
+struct xen_netbk {
+	union {
+		struct {
+			struct tasklet_struct net_tx_tasklet;
+			struct tasklet_struct net_rx_tasklet;
+		} tasklet;
+
+		struct {
+			wait_queue_head_t netbk_action_wq;
+			struct task_struct *task;
+		} kthread;
+	};
+
+	struct sk_buff_head rx_queue;
+	struct sk_buff_head tx_queue;
+
+	struct timer_list net_timer;
+	struct timer_list netbk_tx_pending_timer;
+
+	struct page **mmap_pages;
+
+	pending_ring_idx_t pending_prod;
+	pending_ring_idx_t pending_cons;
+	pending_ring_idx_t dealloc_prod;
+	pending_ring_idx_t dealloc_cons;
+
+	struct list_head pending_inuse_head;
+	struct list_head net_schedule_list;
+
+	/* Protect the net_schedule_list in netif. */
+	spinlock_t net_schedule_list_lock;
+
+	atomic_t netfront_count;
+
+	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+
+	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+	u16 pending_ring[MAX_PENDING_REQS];
+	u16 dealloc_ring[MAX_PENDING_REQS];
+
+	/*
+	 * Each head or fragment can be up to 4096 bytes. Given
+	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
+	 * head/fragment uses 2 copy operation.
+	 */
+	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
+	unsigned char rx_notify[NR_IRQS];
+	u16 notify_list[NET_RX_RING_SIZE];
+	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
+};
+
+extern struct xen_netbk *xen_netbk;
+extern int xen_netbk_group_nr;
+
+#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
new file mode 100644
index 0000000..b429f8c
--- /dev/null
+++ b/drivers/net/xen-netback/interface.c
@@ -0,0 +1,470 @@
+/*
+ * Network-device interface management.
+ *
+ * Copyright (c) 2004-2005, Keir Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "common.h"
+
+#include <linux/ethtool.h>
+#include <linux/rtnetlink.h>
+
+#include <xen/events.h>
+#include <asm/xen/hypercall.h>
+
+/*
+ * Module parameter 'queue_length':
+ *
+ * Enables queuing in the network stack when a client has run out of receive
+ * descriptors.
+ */
+static unsigned long netbk_queue_length = 32;
+module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+
+static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
+			   struct xen_netif *netif)
+{
+	int i;
+	int min_netfront_count;
+	int min_group = 0;
+	min_netfront_count = atomic_read(&netbk[0].netfront_count);
+	for (i = 0; i < group_nr; i++) {
+		int netfront_count = atomic_read(&netbk[i].netfront_count);
+		if (netfront_count < min_netfront_count) {
+			min_group = i;
+			min_netfront_count = netfront_count;
+		}
+	}
+
+	netif->group = min_group;
+	atomic_inc(&netbk[netif->group].netfront_count);
+}
+
+static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
+{
+	atomic_dec(&netbk[netif->group].netfront_count);
+}
+
+static void __netif_up(struct xen_netif *netif)
+{
+	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
+	enable_irq(netif->irq);
+	netif_schedule_work(netif);
+}
+
+static void __netif_down(struct xen_netif *netif)
+{
+	disable_irq(netif->irq);
+	netif_deschedule_work(netif);
+	netbk_remove_netif(xen_netbk, netif);
+}
+
+static int net_open(struct net_device *dev)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	if (netback_carrier_ok(netif)) {
+		__netif_up(netif);
+		netif_start_queue(dev);
+	}
+	return 0;
+}
+
+static int net_close(struct net_device *dev)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	if (netback_carrier_ok(netif))
+		__netif_down(netif);
+	netif_stop_queue(dev);
+	return 0;
+}
+
+static int netbk_change_mtu(struct net_device *dev, int mtu)
+{
+	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+	if (mtu > max)
+		return -EINVAL;
+	dev->mtu = mtu;
+	return 0;
+}
+
+void netif_set_features(struct xen_netif *netif)
+{
+	struct net_device *dev = netif->dev;
+	int features = dev->features;
+
+	if (netif->can_sg)
+		features |= NETIF_F_SG;
+	if (netif->gso || netif->gso_prefix)
+		features |= NETIF_F_TSO;
+	if (netif->csum)
+		features |= NETIF_F_IP_CSUM;
+
+	features &= ~(netif->features_disabled);
+
+	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
+		dev->mtu = ETH_DATA_LEN;
+
+	dev->features = features;
+}
+
+static int netbk_set_tx_csum(struct net_device *dev, u32 data)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	if (data) {
+		if (!netif->csum)
+			return -ENOSYS;
+		netif->features_disabled &= ~NETIF_F_IP_CSUM;
+	} else {
+		netif->features_disabled |= NETIF_F_IP_CSUM;
+	}
+
+	netif_set_features(netif);
+	return 0;
+}
+
+static int netbk_set_sg(struct net_device *dev, u32 data)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	if (data) {
+		if (!netif->can_sg)
+			return -ENOSYS;
+		netif->features_disabled &= ~NETIF_F_SG;
+	} else {
+		netif->features_disabled |= NETIF_F_SG;
+	}
+
+	netif_set_features(netif);
+	return 0;
+}
+
+static int netbk_set_tso(struct net_device *dev, u32 data)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	if (data) {
+		if (!netif->gso && !netif->gso_prefix)
+			return -ENOSYS;
+		netif->features_disabled &= ~NETIF_F_TSO;
+	} else {
+		netif->features_disabled |= NETIF_F_TSO;
+	}
+
+	netif_set_features(netif);
+	return 0;
+}
+
+static void netbk_get_drvinfo(struct net_device *dev,
+			      struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, "netbk");
+	strcpy(info->bus_info, dev_name(dev->dev.parent));
+}
+
+static const struct netif_stat {
+	char name[ETH_GSTRING_LEN];
+	u16 offset;
+} netbk_stats[] = {
+	{
+		"copied_skbs",
+		offsetof(struct xen_netif, nr_copied_skbs)
+	},
+	{
+		"rx_gso_checksum_fixup",
+		offsetof(struct xen_netif, rx_gso_checksum_fixup)
+	},
+};
+
+static int netbk_get_sset_count(struct net_device *dev, int string_set)
+{
+	switch (string_set) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(netbk_stats);
+	default:
+		return -EINVAL;
+	}
+}
+
+static void netbk_get_ethtool_stats(struct net_device *dev,
+				   struct ethtool_stats *stats, u64 * data)
+{
+	void *netif = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+		data[i] = *(int *)(netif + netbk_stats[i].offset);
+}
+
+static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+{
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+			memcpy(data + i * ETH_GSTRING_LEN,
+			       netbk_stats[i].name, ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static struct ethtool_ops network_ethtool_ops = {
+	.get_drvinfo = netbk_get_drvinfo,
+
+	.get_tx_csum = ethtool_op_get_tx_csum,
+	.set_tx_csum = netbk_set_tx_csum,
+	.get_sg = ethtool_op_get_sg,
+	.set_sg = netbk_set_sg,
+	.get_tso = ethtool_op_get_tso,
+	.set_tso = netbk_set_tso,
+	.get_link = ethtool_op_get_link,
+
+	.get_sset_count = netbk_get_sset_count,
+	.get_ethtool_stats = netbk_get_ethtool_stats,
+	.get_strings = netbk_get_strings,
+};
+
+static struct net_device_ops netback_ops = {
+	.ndo_start_xmit	= netif_be_start_xmit,
+	.ndo_get_stats	= netif_be_get_stats,
+	.ndo_open	= net_open,
+	.ndo_stop	= net_close,
+	.ndo_change_mtu	= netbk_change_mtu,
+};
+
+struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
+			      unsigned int handle)
+{
+	int err = 0;
+	struct net_device *dev;
+	struct xen_netif *netif;
+	char name[IFNAMSIZ] = {};
+
+	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
+	if (dev == NULL) {
+		pr_debug("Could not allocate netdev\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	SET_NETDEV_DEV(dev, parent);
+
+	netif = netdev_priv(dev);
+	memset(netif, 0, sizeof(*netif));
+	netif->domid  = domid;
+	netif->group  = -1;
+	netif->handle = handle;
+	netif->can_sg = 1;
+	netif->csum = 1;
+	atomic_set(&netif->refcnt, 1);
+	init_waitqueue_head(&netif->waiting_to_free);
+	netif->dev = dev;
+	INIT_LIST_HEAD(&netif->list);
+
+	netback_carrier_off(netif);
+
+	netif->credit_bytes = netif->remaining_credit = ~0UL;
+	netif->credit_usec  = 0UL;
+	init_timer(&netif->credit_timeout);
+	/* Initialize 'expires' now: it's used to track the credit window. */
+	netif->credit_timeout.expires = jiffies;
+
+	dev->netdev_ops	= &netback_ops;
+	netif_set_features(netif);
+	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+
+	dev->tx_queue_len = netbk_queue_length;
+
+	/*
+	 * Initialise a dummy MAC address. We choose the numerically
+	 * largest non-broadcast address to prevent the address getting
+	 * stolen by an Ethernet bridge for STP purposes.
+	 * (FE:FF:FF:FF:FF:FF)
+	 */
+	memset(dev->dev_addr, 0xFF, ETH_ALEN);
+	dev->dev_addr[0] &= ~0x01;
+
+	rtnl_lock();
+	err = register_netdevice(dev);
+	rtnl_unlock();
+	if (err) {
+		pr_debug("Could not register new net device %s: err=%d\n",
+			 dev->name, err);
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
+
+	pr_debug("Successfully created netif\n");
+	return netif;
+}
+
+static int map_frontend_pages(struct xen_netif *netif,
+			      grant_ref_t tx_ring_ref,
+			      grant_ref_t rx_ring_ref)
+{
+	struct gnttab_map_grant_ref op;
+
+	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
+			  GNTMAP_host_map, tx_ring_ref, netif->domid);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+		BUG();
+
+	if (op.status) {
+		pr_debug("Gnttab failure mapping tx_ring_ref!\n");
+		return op.status;
+	}
+
+	netif->tx_shmem_ref    = tx_ring_ref;
+	netif->tx_shmem_handle = op.handle;
+
+	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
+			  GNTMAP_host_map, rx_ring_ref, netif->domid);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+		BUG();
+
+	if (op.status) {
+		struct gnttab_unmap_grant_ref unop;
+
+		gnttab_set_unmap_op(&unop,
+				    (unsigned long)netif->tx_comms_area->addr,
+				    GNTMAP_host_map, netif->tx_shmem_handle);
+		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
+		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
+		return op.status;
+	}
+
+	netif->rx_shmem_ref    = rx_ring_ref;
+	netif->rx_shmem_handle = op.handle;
+
+	return 0;
+}
+
+static void unmap_frontend_pages(struct xen_netif *netif)
+{
+	struct gnttab_unmap_grant_ref op;
+
+	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
+			    GNTMAP_host_map, netif->tx_shmem_handle);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+		BUG();
+
+	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
+			    GNTMAP_host_map, netif->rx_shmem_handle);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+		BUG();
+}
+
+int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
+	      unsigned long rx_ring_ref, unsigned int evtchn)
+{
+	int err = -ENOMEM;
+	struct xen_netif_tx_sring *txs;
+	struct xen_netif_rx_sring *rxs;
+
+	/* Already connected through? */
+	if (netif->irq)
+		return 0;
+
+	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
+	if (netif->tx_comms_area == NULL)
+		return -ENOMEM;
+	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
+	if (netif->rx_comms_area == NULL)
+		goto err_rx;
+
+	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
+	if (err)
+		goto err_map;
+
+	err = bind_interdomain_evtchn_to_irqhandler(
+		netif->domid, evtchn, netif_be_int, 0,
+		netif->dev->name, netif);
+	if (err < 0)
+		goto err_hypervisor;
+	netif->irq = err;
+	disable_irq(netif->irq);
+
+	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
+	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+
+	rxs = (struct xen_netif_rx_sring *)
+		((char *)netif->rx_comms_area->addr);
+	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+
+	netif->rx_req_cons_peek = 0;
+
+	netif_get(netif);
+
+	rtnl_lock();
+	netback_carrier_on(netif);
+	if (netif_running(netif->dev))
+		__netif_up(netif);
+	rtnl_unlock();
+
+	return 0;
+err_hypervisor:
+	unmap_frontend_pages(netif);
+err_map:
+	free_vm_area(netif->rx_comms_area);
+err_rx:
+	free_vm_area(netif->tx_comms_area);
+	return err;
+}
+
+void netif_disconnect(struct xen_netif *netif)
+{
+	if (netback_carrier_ok(netif)) {
+		rtnl_lock();
+		netback_carrier_off(netif);
+		netif_carrier_off(netif->dev); /* discard queued packets */
+		if (netif_running(netif->dev))
+			__netif_down(netif);
+		rtnl_unlock();
+		netif_put(netif);
+	}
+
+	atomic_dec(&netif->refcnt);
+	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
+
+	del_timer_sync(&netif->credit_timeout);
+
+	if (netif->irq)
+		unbind_from_irqhandler(netif->irq, netif);
+
+	unregister_netdev(netif->dev);
+
+	if (netif->tx.sring) {
+		unmap_frontend_pages(netif);
+		free_vm_area(netif->tx_comms_area);
+		free_vm_area(netif->rx_comms_area);
+	}
+
+	free_netdev(netif->dev);
+}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
new file mode 100644
index 0000000..b290525
--- /dev/null
+++ b/drivers/net/xen-netback/netback.c
@@ -0,0 +1,1934 @@
+/*
+ * Back-end of the driver for virtual network devices. This portion of the
+ * driver exports a 'unified' network-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A
+ * reference front-end implementation can be found in:
+ *  drivers/net/xen-netfront.c
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "common.h"
+
+#include <linux/kthread.h>
+#include <linux/if_vlan.h>
+#include <linux/udp.h>
+
+#include <net/tcp.h>
+
+#include <xen/balloon.h>
+#include <xen/events.h>
+#include <xen/interface/memory.h>
+
+#include <asm/xen/hypercall.h>
+#include <asm/xen/page.h>
+
+/*define NETBE_DEBUG_INTERRUPT*/
+
+struct xen_netbk *xen_netbk;
+int xen_netbk_group_nr;
+
+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
+static void make_tx_response(struct xen_netif *netif,
+			     struct xen_netif_tx_request *txp,
+			     s8       st);
+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+					     u16      id,
+					     s8       st,
+					     u16      offset,
+					     u16      size,
+					     u16      flags);
+
+static void net_tx_action(unsigned long data);
+
+static void net_rx_action(unsigned long data);
+
+static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
+				       unsigned int idx)
+{
+	return page_to_pfn(netbk->mmap_pages[idx]);
+}
+
+static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
+					 unsigned int idx)
+{
+	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
+}
+
+/* extra field used in struct page */
+static inline void netif_set_page_ext(struct page *pg,
+				      unsigned int group, unsigned int idx)
+{
+	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
+
+	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
+	pg->mapping = ext.mapping;
+}
+
+static int netif_get_page_ext(struct page *pg,
+			      unsigned int *_group, unsigned int *_idx)
+{
+	union page_ext ext = { .mapping = pg->mapping };
+	struct xen_netbk *netbk;
+	unsigned int group, idx;
+
+	if (!PageForeign(pg))
+		return 0;
+
+	group = ext.e.group - 1;
+
+	if (group < 0 || group >= xen_netbk_group_nr)
+		return 0;
+
+	netbk = &xen_netbk[group];
+
+	if (netbk->mmap_pages == NULL)
+		return 0;
+
+	idx = ext.e.idx;
+
+	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
+		return 0;
+
+	if (netbk->mmap_pages[idx] != pg)
+		return 0;
+
+	*_group = group;
+	*_idx = idx;
+
+	return 1;
+}
+
+/*
+ * This is the amount of packet we copy rather than map, so that the
+ * guest can't fiddle with the contents of the headers while we do
+ * packet processing on them (netfilter, routing, etc).
+ */
+#define PKT_PROT_LEN    (ETH_HLEN + \
+			 VLAN_HLEN + \
+			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
+			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
+
+static inline pending_ring_idx_t pending_index(unsigned i)
+{
+	return i & (MAX_PENDING_REQS-1);
+}
+
+static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
+{
+	return MAX_PENDING_REQS -
+		netbk->pending_prod + netbk->pending_cons;
+}
+
+/* Setting this allows the safe use of this driver without netloop. */
+static int MODPARM_copy_skb = 1;
+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
+MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
+
+int netbk_copy_skb_mode;
+
+static int MODPARM_netback_kthread;
+module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
+MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
+
+/*
+ * Netback bottom half handler.
+ * dir indicates the data direction.
+ * rx: 1, tx: 0.
+ */
+static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
+{
+	if (MODPARM_netback_kthread)
+		wake_up(&netbk->kthread.netbk_action_wq);
+	else if (dir)
+		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
+	else
+		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
+}
+
+static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
+{
+	smp_mb();
+	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
+	    !list_empty(&netbk->net_schedule_list))
+		xen_netbk_bh_handler(netbk, 0);
+}
+
+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+{
+	struct skb_shared_info *ninfo;
+	struct sk_buff *nskb;
+	unsigned long offset;
+	int ret;
+	int len;
+	int headlen;
+
+	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
+
+	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!nskb))
+		goto err;
+
+	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
+	headlen = skb_end_pointer(nskb) - nskb->data;
+	if (headlen > skb_headlen(skb))
+		headlen = skb_headlen(skb);
+	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+	BUG_ON(ret);
+
+	ninfo = skb_shinfo(nskb);
+	ninfo->gso_size = skb_shinfo(skb)->gso_size;
+	ninfo->gso_type = skb_shinfo(skb)->gso_type;
+
+	offset = headlen;
+	len = skb->len - headlen;
+
+	nskb->len = skb->len;
+	nskb->data_len = len;
+	nskb->truesize += len;
+
+	while (len) {
+		struct page *page;
+		int copy;
+		int zero;
+
+		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+			dump_stack();
+			goto err_free;
+		}
+
+		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+
+		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
+		if (unlikely(!page))
+			goto err_free;
+
+		ret = skb_copy_bits(skb, offset, page_address(page), copy);
+		BUG_ON(ret);
+
+		ninfo->frags[ninfo->nr_frags].page = page;
+		ninfo->frags[ninfo->nr_frags].page_offset = 0;
+		ninfo->frags[ninfo->nr_frags].size = copy;
+		ninfo->nr_frags++;
+
+		offset += copy;
+		len -= copy;
+	}
+
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	offset = 0;
+#else
+	offset = nskb->data - skb->data;
+#endif
+
+	nskb->transport_header = skb->transport_header + offset;
+	nskb->network_header = skb->network_header + offset;
+	nskb->mac_header = skb->mac_header + offset;
+
+	return nskb;
+
+ err_free:
+	kfree_skb(nskb);
+ err:
+	return NULL;
+}
+
+static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+{
+	if (netif->can_sg || netif->gso || netif->gso_prefix)
+		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+	return 1; /* all in one */
+}
+
+static inline int netbk_queue_full(struct xen_netif *netif)
+{
+	RING_IDX peek   = netif->rx_req_cons_peek;
+	RING_IDX needed = netbk_max_required_rx_slots(netif);
+
+	return ((netif->rx.sring->req_prod - peek) < needed) ||
+	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+}
+
+/*
+ * Returns true if we should start a new receive buffer instead of
+ * adding 'size' bytes to a buffer which currently contains 'offset'
+ * bytes.
+ */
+static bool start_new_rx_buffer(int offset, unsigned long size, int head)
+{
+	/* simple case: we have completely filled the current buffer. */
+	if (offset == MAX_BUFFER_OFFSET)
+		return true;
+
+	/*
+	 * complex case: start a fresh buffer if the current frag
+	 * would overflow the current buffer but only if:
+	 *     (i)   this frag would fit completely in the next buffer
+	 * and (ii)  there is already some data in the current buffer
+	 * and (iii) this is not the head buffer.
+	 *
+	 * Where:
+	 * - (i) stops us splitting a frag into two copies
+	 *   unless the frag is too large for a single buffer.
+	 * - (ii) stops us from leaving a buffer pointlessly empty.
+	 * - (iii) stops us leaving the first buffer
+	 *   empty. Strictly speaking this is already covered
+	 *   by (ii) but is explicitly checked because
+	 *   netfront relies on the first buffer being
+	 *   non-empty and can crash otherwise.
+	 *
+	 * This means we will effectively linearise small
+	 * frags but do not needlessly split large buffers
+	 * into multiple copies tend to give large frags their
+	 * own buffers as before.
+	 */
+	if ((offset + size > MAX_BUFFER_OFFSET) &&
+	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
+		return true;
+
+	return false;
+}
+
+/*
+ * Figure out how many ring slots we're going to need to send @skb to
+ * the guest. This function is essentially a dry run of
+ * netbk_gop_frag_copy.
+ */
+static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+{
+	unsigned int count = 1;
+	int i, copy_off = 0;
+
+	BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
+
+	copy_off = skb_headlen(skb);
+
+	if (skb_shinfo(skb)->gso_size)
+		count++;
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		unsigned long size = skb_shinfo(skb)->frags[i].size;
+		unsigned long bytes;
+		while (size > 0) {
+			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
+
+			if (start_new_rx_buffer(copy_off, size, 0)) {
+				count++;
+				copy_off = 0;
+			}
+
+			bytes = size;
+			if (copy_off + bytes > MAX_BUFFER_OFFSET)
+				bytes = MAX_BUFFER_OFFSET - copy_off;
+
+			copy_off += bytes;
+			size -= bytes;
+		}
+	}
+	return count;
+}
+
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	struct xen_netbk *netbk;
+
+	BUG_ON(skb->dev != dev);
+
+	if (netif->group == -1)
+		goto drop;
+
+	netbk = &xen_netbk[netif->group];
+
+	/* Drop the packet if the target domain has no receive buffers. */
+	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+		goto drop;
+
+	/*
+	 * XXX For now we also copy skbuffs whose head crosses a page
+	 * boundary, because netbk_gop_skb can't handle them.
+	 */
+	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
+		struct sk_buff *nskb = netbk_copy_skb(skb);
+		if (unlikely(nskb == NULL))
+			goto drop;
+		/* Copy only the header fields we use in this driver. */
+		nskb->dev = skb->dev;
+		nskb->ip_summed = skb->ip_summed;
+		dev_kfree_skb(skb);
+		skb = nskb;
+	}
+
+	/* Reserve ring slots for the worst-case number of fragments. */
+	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
+	netif_get(netif);
+
+	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
+		netif->rx.sring->req_event = netif->rx_req_cons_peek +
+			netbk_max_required_rx_slots(netif);
+		mb(); /* request notification /then/ check & stop the queue */
+		if (netbk_queue_full(netif))
+			netif_stop_queue(dev);
+	}
+	skb_queue_tail(&netbk->rx_queue, skb);
+
+	xen_netbk_bh_handler(netbk, 1);
+
+	return 0;
+
+ drop:
+	netif->stats.tx_dropped++;
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+struct netrx_pending_operations {
+	unsigned copy_prod, copy_cons;
+	unsigned meta_prod, meta_cons;
+	struct gnttab_copy *copy;
+	struct netbk_rx_meta *meta;
+	int copy_off;
+	grant_ref_t copy_gref;
+};
+
+static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
+						struct netrx_pending_operations *npo)
+{
+	struct netbk_rx_meta *meta;
+	struct xen_netif_rx_request *req;
+
+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+
+	meta = npo->meta + npo->meta_prod++;
+	meta->gso_size = 0;
+	meta->size = 0;
+	meta->id = req->id;
+
+	npo->copy_off = 0;
+	npo->copy_gref = req->gref;
+
+	return meta;
+}
+
+/*
+ * Set up the grant operations for this fragment. If it's a flipping
+ * interface, we also set up the unmap request from here.
+ */
+static void netbk_gop_frag_copy(struct xen_netif *netif,
+				struct netrx_pending_operations *npo,
+				struct page *page, unsigned long size,
+				unsigned long offset, int head)
+{
+	struct gnttab_copy *copy_gop;
+	struct netbk_rx_meta *meta;
+	/*
+	 * These variables a used iff netif_get_page_ext returns true,
+	 * in which case they are guaranteed to be initialized.
+	 */
+	unsigned int uninitialized_var(group), uninitialized_var(idx);
+	int foreign = netif_get_page_ext(page, &group, &idx);
+	unsigned long bytes;
+
+	/* Data must not cross a page boundary. */
+	BUG_ON(size + offset > PAGE_SIZE);
+
+	meta = npo->meta + npo->meta_prod - 1;
+
+	while (size > 0) {
+		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+
+		if (start_new_rx_buffer(npo->copy_off, size, head)) {
+			/*
+			 * Netfront requires there to be some data in the head
+			 * buffer.
+			 */
+			BUG_ON(head);
+
+			meta = get_next_rx_buffer(netif, npo);
+		}
+
+		bytes = size;
+		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
+			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
+
+		copy_gop = npo->copy + npo->copy_prod++;
+		copy_gop->flags = GNTCOPY_dest_gref;
+		if (foreign) {
+			struct xen_netbk *netbk = &xen_netbk[group];
+			struct pending_tx_info *src_pend;
+
+			src_pend = &netbk->pending_tx_info[idx];
+
+			copy_gop->source.domid = src_pend->netif->domid;
+			copy_gop->source.u.ref = src_pend->req.gref;
+			copy_gop->flags |= GNTCOPY_source_gref;
+		} else {
+			void *vaddr = page_address(page);
+			copy_gop->source.domid = DOMID_SELF;
+			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
+		}
+		copy_gop->source.offset = offset;
+		copy_gop->dest.domid = netif->domid;
+
+		copy_gop->dest.offset = npo->copy_off;
+		copy_gop->dest.u.ref = npo->copy_gref;
+		copy_gop->len = bytes;
+
+		npo->copy_off += bytes;
+		meta->size += bytes;
+
+		offset += bytes;
+		size -= bytes;
+		head = 0; /* There must be something in this buffer now. */
+	}
+}
+
+/*
+ * Prepare an SKB to be transmitted to the frontend.
+ *
+ * This function is responsible for allocating grant operations, meta
+ * structures, etc.
+ *
+ * It returns the number of meta structures consumed. The number of
+ * ring slots used is always equal to the number of meta slots used
+ * plus the number of GSO descriptors used. Currently, we use either
+ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
+ * frontend-side LRO).
+ */
+static int netbk_gop_skb(struct sk_buff *skb,
+			 struct netrx_pending_operations *npo)
+{
+	struct xen_netif *netif = netdev_priv(skb->dev);
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	int i;
+	struct xen_netif_rx_request *req;
+	struct netbk_rx_meta *meta;
+	int old_meta_prod;
+
+	old_meta_prod = npo->meta_prod;
+
+	/* Set up a GSO prefix descriptor, if necessary */
+	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
+		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+		meta = npo->meta + npo->meta_prod++;
+		meta->gso_size = skb_shinfo(skb)->gso_size;
+		meta->size = 0;
+		meta->id = req->id;
+	}
+
+	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+	meta = npo->meta + npo->meta_prod++;
+
+	if (!netif->gso_prefix)
+		meta->gso_size = skb_shinfo(skb)->gso_size;
+	else
+		meta->gso_size = 0;
+
+	meta->size = 0;
+	meta->id = req->id;
+	npo->copy_off = 0;
+	npo->copy_gref = req->gref;
+
+	netbk_gop_frag_copy(netif,
+			    npo, virt_to_page(skb->data),
+			    skb_headlen(skb),
+			    offset_in_page(skb->data), 1);
+
+	/* Leave a gap for the GSO descriptor. */
+	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
+		netif->rx.req_cons++;
+
+	for (i = 0; i < nr_frags; i++) {
+		netbk_gop_frag_copy(netif, npo,
+				    skb_shinfo(skb)->frags[i].page,
+				    skb_shinfo(skb)->frags[i].size,
+				    skb_shinfo(skb)->frags[i].page_offset,
+				    0);
+	}
+
+	return npo->meta_prod - old_meta_prod;
+}
+
+/*
+ * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
+ * used to set up the operations on the top of
+ * netrx_pending_operations, which have since been done.  Check that
+ * they didn't give any errors and advance over them.
+ */
+static int netbk_check_gop(int nr_meta_slots, domid_t domid,
+			   struct netrx_pending_operations *npo)
+{
+	struct gnttab_copy     *copy_op;
+	int status = NETIF_RSP_OKAY;
+	int i;
+
+	for (i = 0; i < nr_meta_slots; i++) {
+		copy_op = npo->copy + npo->copy_cons++;
+		if (copy_op->status != GNTST_okay) {
+				pr_debug("Bad status %d from copy to DOM%d.\n",
+					 copy_op->status, domid);
+				status = NETIF_RSP_ERROR;
+			}
+	}
+
+	return status;
+}
+
+static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+				     struct netbk_rx_meta *meta,
+				     int nr_meta_slots)
+{
+	int i;
+	unsigned long offset;
+
+	/* No fragments used */
+	if (nr_meta_slots <= 1)
+		return;
+
+	nr_meta_slots--;
+
+	for (i = 0; i < nr_meta_slots; i++) {
+		int flags;
+		if (i == nr_meta_slots - 1)
+			flags = 0;
+		else
+			flags = NETRXF_more_data;
+
+		offset = 0;
+		make_rx_response(netif, meta[i].id, status, offset,
+				 meta[i].size, flags);
+	}
+}
+
+struct skb_cb_overlay {
+	int meta_slots_used;
+};
+
+static void net_rx_action(unsigned long data)
+{
+	struct xen_netif *netif = NULL;
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
+	s8 status;
+	u16 irq, flags;
+	struct xen_netif_rx_response *resp;
+	struct sk_buff_head rxq;
+	struct sk_buff *skb;
+	int notify_nr = 0;
+	int ret;
+	int nr_frags;
+	int count;
+	unsigned long offset;
+	struct skb_cb_overlay *sco;
+
+	struct netrx_pending_operations npo = {
+		.copy  = netbk->grant_copy_op,
+		.meta  = netbk->meta,
+	};
+
+	skb_queue_head_init(&rxq);
+
+	count = 0;
+
+	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
+		netif = netdev_priv(skb->dev);
+		nr_frags = skb_shinfo(skb)->nr_frags;
+
+		sco = (struct skb_cb_overlay *)skb->cb;
+		sco->meta_slots_used = netbk_gop_skb(skb, &npo);
+
+		count += nr_frags + 1;
+
+		__skb_queue_tail(&rxq, skb);
+
+		/* Filled the batch queue? */
+		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
+			break;
+	}
+
+	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
+
+	if (!npo.copy_prod)
+		return;
+
+	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
+					npo.copy_prod);
+	BUG_ON(ret != 0);
+
+	while ((skb = __skb_dequeue(&rxq)) != NULL) {
+		sco = (struct skb_cb_overlay *)skb->cb;
+
+		netif = netdev_priv(skb->dev);
+
+		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
+			resp = RING_GET_RESPONSE(&netif->rx,
+						netif->rx.rsp_prod_pvt++);
+
+			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
+
+			resp->offset = netbk->meta[npo.meta_cons].gso_size;
+			resp->id = netbk->meta[npo.meta_cons].id;
+			resp->status = sco->meta_slots_used;
+
+			npo.meta_cons++;
+			sco->meta_slots_used--;
+		}
+
+
+		netif->stats.tx_bytes += skb->len;
+		netif->stats.tx_packets++;
+
+		status = netbk_check_gop(sco->meta_slots_used,
+					 netif->domid, &npo);
+
+		if (sco->meta_slots_used == 1)
+			flags = 0;
+		else
+			flags = NETRXF_more_data;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+			flags |= NETRXF_csum_blank | NETRXF_data_validated;
+		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+			/* remote but checksummed. */
+			flags |= NETRXF_data_validated;
+
+		offset = 0;
+		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
+					status, offset,
+					netbk->meta[npo.meta_cons].size,
+					flags);
+
+		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
+			struct xen_netif_extra_info *gso =
+				(struct xen_netif_extra_info *)
+				RING_GET_RESPONSE(&netif->rx,
+						  netif->rx.rsp_prod_pvt++);
+
+			resp->flags |= NETRXF_extra_info;
+
+			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
+			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+			gso->u.gso.pad = 0;
+			gso->u.gso.features = 0;
+
+			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+			gso->flags = 0;
+		}
+
+		netbk_add_frag_responses(netif, status,
+					 netbk->meta + npo.meta_cons + 1,
+					 sco->meta_slots_used);
+
+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+		irq = netif->irq;
+		if (ret && !netbk->rx_notify[irq]) {
+			netbk->rx_notify[irq] = 1;
+			netbk->notify_list[notify_nr++] = irq;
+		}
+
+		if (netif_queue_stopped(netif->dev) &&
+		    netif_schedulable(netif) &&
+		    !netbk_queue_full(netif))
+			netif_wake_queue(netif->dev);
+
+		netif_put(netif);
+		npo.meta_cons += sco->meta_slots_used;
+		dev_kfree_skb(skb);
+	}
+
+	while (notify_nr != 0) {
+		irq = netbk->notify_list[--notify_nr];
+		netbk->rx_notify[irq] = 0;
+		notify_remote_via_irq(irq);
+	}
+
+	/* More work to do? */
+	if (!skb_queue_empty(&netbk->rx_queue) &&
+			!timer_pending(&netbk->net_timer))
+		xen_netbk_bh_handler(netbk, 1);
+}
+
+static void net_alarm(unsigned long data)
+{
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
+	xen_netbk_bh_handler(netbk, 1);
+}
+
+static void netbk_tx_pending_timeout(unsigned long data)
+{
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
+	xen_netbk_bh_handler(netbk, 0);
+}
+
+struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+{
+	struct xen_netif *netif = netdev_priv(dev);
+	return &netif->stats;
+}
+
+static int __on_net_schedule_list(struct xen_netif *netif)
+{
+	return !list_empty(&netif->list);
+}
+
+/* Must be called with net_schedule_list_lock held */
+static void remove_from_net_schedule_list(struct xen_netif *netif)
+{
+	if (likely(__on_net_schedule_list(netif))) {
+		list_del_init(&netif->list);
+		netif_put(netif);
+	}
+}
+
+static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
+{
+	struct xen_netif *netif = NULL;
+
+	spin_lock_irq(&netbk->net_schedule_list_lock);
+	if (list_empty(&netbk->net_schedule_list))
+		goto out;
+
+	netif = list_first_entry(&netbk->net_schedule_list,
+				 struct xen_netif, list);
+	if (!netif)
+		goto out;
+
+	netif_get(netif);
+
+	remove_from_net_schedule_list(netif);
+out:
+	spin_unlock_irq(&netbk->net_schedule_list_lock);
+	return netif;
+}
+
+static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+{
+	unsigned long flags;
+
+	struct xen_netbk *netbk = &xen_netbk[netif->group];
+	if (__on_net_schedule_list(netif))
+		return;
+
+	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
+	if (!__on_net_schedule_list(netif) &&
+	    likely(netif_schedulable(netif))) {
+		list_add_tail(&netif->list, &netbk->net_schedule_list);
+		netif_get(netif);
+	}
+	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
+}
+
+void netif_schedule_work(struct xen_netif *netif)
+{
+	struct xen_netbk *netbk = &xen_netbk[netif->group];
+	int more_to_do;
+
+	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+
+	if (more_to_do) {
+		add_to_net_schedule_list_tail(netif);
+		maybe_schedule_tx_action(netbk);
+	}
+}
+
+void netif_deschedule_work(struct xen_netif *netif)
+{
+	struct xen_netbk *netbk = &xen_netbk[netif->group];
+	spin_lock_irq(&netbk->net_schedule_list_lock);
+	remove_from_net_schedule_list(netif);
+	spin_unlock_irq(&netbk->net_schedule_list_lock);
+}
+
+
+static void tx_add_credit(struct xen_netif *netif)
+{
+	unsigned long max_burst, max_credit;
+
+	/*
+	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
+	 * Otherwise the interface can seize up due to insufficient credit.
+	 */
+	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
+	max_burst = min(max_burst, 131072UL);
+	max_burst = max(max_burst, netif->credit_bytes);
+
+	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
+	max_credit = netif->remaining_credit + netif->credit_bytes;
+	if (max_credit < netif->remaining_credit)
+		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
+
+	netif->remaining_credit = min(max_credit, max_burst);
+}
+
+static void tx_credit_callback(unsigned long data)
+{
+	struct xen_netif *netif = (struct xen_netif *)data;
+	tx_add_credit(netif);
+	netif_schedule_work(netif);
+}
+
+static inline int copy_pending_req(struct xen_netbk *netbk,
+				   pending_ring_idx_t pending_idx)
+{
+	return gnttab_copy_grant_page(
+			netbk->grant_tx_handle[pending_idx],
+			&netbk->mmap_pages[pending_idx]);
+}
+
+static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+{
+	struct netbk_tx_pending_inuse *inuse, *n;
+	struct gnttab_unmap_grant_ref *gop;
+	u16 pending_idx;
+	pending_ring_idx_t dc, dp;
+	struct xen_netif *netif;
+	int ret;
+	LIST_HEAD(list);
+
+	dc = netbk->dealloc_cons;
+	gop = netbk->tx_unmap_ops;
+
+	/* Free up any grants we have finished using. */
+	do {
+		dp = netbk->dealloc_prod;
+
+		/* Ensure we see all indices enqueued by netif_idx_release(). */
+		smp_rmb();
+
+		while (dc != dp) {
+			unsigned long pfn;
+			struct netbk_tx_pending_inuse *pending_inuse =
+					netbk->pending_inuse;
+
+			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
+			list_move_tail(&pending_inuse[pending_idx].list, &list);
+
+			pfn = idx_to_pfn(netbk, pending_idx);
+			/* Already unmapped? */
+			if (!phys_to_machine_mapping_valid(pfn))
+				continue;
+
+			gnttab_set_unmap_op(gop,
+					idx_to_kaddr(netbk, pending_idx),
+					GNTMAP_host_map,
+					netbk->grant_tx_handle[pending_idx]);
+			gop++;
+		}
+
+	} while (dp != netbk->dealloc_prod);
+
+	netbk->dealloc_cons = dc;
+
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
+		gop - netbk->tx_unmap_ops);
+	BUG_ON(ret);
+
+	/*
+	 * Copy any entries that have been pending for too long
+	 */
+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+	    !list_empty(&netbk->pending_inuse_head)) {
+		list_for_each_entry_safe(inuse, n,
+				&netbk->pending_inuse_head, list) {
+			struct pending_tx_info *pending_tx_info;
+			pending_tx_info = netbk->pending_tx_info;
+
+			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
+				break;
+
+			pending_idx = inuse - netbk->pending_inuse;
+
+			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+
+			switch (copy_pending_req(netbk, pending_idx)) {
+			case 0:
+				list_move_tail(&inuse->list, &list);
+				continue;
+			case -EBUSY:
+				list_del_init(&inuse->list);
+				continue;
+			case -ENOENT:
+				continue;
+			}
+
+			break;
+		}
+	}
+
+	list_for_each_entry_safe(inuse, n, &list, list) {
+		struct pending_tx_info *pending_tx_info;
+		pending_ring_idx_t index;
+
+		pending_tx_info = netbk->pending_tx_info;
+		pending_idx = inuse - netbk->pending_inuse;
+
+		netif = pending_tx_info[pending_idx].netif;
+
+		make_tx_response(netif, &pending_tx_info[pending_idx].req,
+				 NETIF_RSP_OKAY);
+
+		/* Ready for next use. */
+		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
+
+		index = pending_index(netbk->pending_prod++);
+		netbk->pending_ring[index] = pending_idx;
+
+		netif_put(netif);
+
+		list_del_init(&inuse->list);
+	}
+}
+
+static void netbk_tx_err(struct xen_netif *netif,
+		struct xen_netif_tx_request *txp, RING_IDX end)
+{
+	RING_IDX cons = netif->tx.req_cons;
+
+	do {
+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		if (cons >= end)
+			break;
+		txp = RING_GET_REQUEST(&netif->tx, cons++);
+	} while (1);
+	netif->tx.req_cons = cons;
+	netif_schedule_work(netif);
+	netif_put(netif);
+}
+
+static int netbk_count_requests(struct xen_netif *netif,
+				struct xen_netif_tx_request *first,
+				struct xen_netif_tx_request *txp,
+				int work_to_do)
+{
+	RING_IDX cons = netif->tx.req_cons;
+	int frags = 0;
+
+	if (!(first->flags & NETTXF_more_data))
+		return 0;
+
+	do {
+		if (frags >= work_to_do) {
+			pr_debug("Need more frags\n");
+			return -frags;
+		}
+
+		if (unlikely(frags >= MAX_SKB_FRAGS)) {
+			pr_debug("Too many frags\n");
+			return -frags;
+		}
+
+		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
+		       sizeof(*txp));
+		if (txp->size > first->size) {
+			pr_debug("Frags galore\n");
+			return -frags;
+		}
+
+		first->size -= txp->size;
+		frags++;
+
+		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+			pr_debug("txp->offset: %x, size: %u\n",
+				 txp->offset, txp->size);
+			return -frags;
+		}
+	} while ((txp++)->flags & NETTXF_more_data);
+
+	return frags;
+}
+
+static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
+						       struct xen_netif *netif,
+						       struct sk_buff *skb,
+						       struct xen_netif_tx_request *txp,
+						       struct gnttab_map_grant_ref *mop)
+{
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	skb_frag_t *frags = shinfo->frags;
+	unsigned long pending_idx = *((u16 *)skb->data);
+	int i, start;
+
+	/* Skip first skb fragment if it is on same page as header fragment. */
+	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+	for (i = start; i < shinfo->nr_frags; i++, txp++) {
+		pending_ring_idx_t index;
+		struct pending_tx_info *pending_tx_info =
+			netbk->pending_tx_info;
+
+		index = pending_index(netbk->pending_cons++);
+		pending_idx = netbk->pending_ring[index];
+
+		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
+				  GNTMAP_host_map | GNTMAP_readonly,
+				  txp->gref, netif->domid);
+
+		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+		netif_get(netif);
+		pending_tx_info[pending_idx].netif = netif;
+		frags[i].page = (void *)pending_idx;
+	}
+
+	return mop;
+}
+
+static int netbk_tx_check_mop(struct xen_netbk *netbk,
+			      struct sk_buff *skb,
+			      struct gnttab_map_grant_ref **mopp)
+{
+	struct gnttab_map_grant_ref *mop = *mopp;
+	int pending_idx = *((u16 *)skb->data);
+	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
+	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+	struct xen_netif_tx_request *txp;
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	int nr_frags = shinfo->nr_frags;
+	int i, err, start;
+
+	/* Check status of header. */
+	err = mop->status;
+	if (unlikely(err)) {
+		pending_ring_idx_t index;
+		index = pending_index(netbk->pending_prod++);
+		txp = &pending_tx_info[pending_idx].req;
+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		netbk->pending_ring[index] = pending_idx;
+		netif_put(netif);
+	} else {
+		set_phys_to_machine(
+			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
+			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+		netbk->grant_tx_handle[pending_idx] = mop->handle;
+	}
+
+	/* Skip first skb fragment if it is on same page as header fragment. */
+	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+	for (i = start; i < nr_frags; i++) {
+		int j, newerr;
+		pending_ring_idx_t index;
+
+		pending_idx = (unsigned long)shinfo->frags[i].page;
+
+		/* Check error status: if okay then remember grant handle. */
+		newerr = (++mop)->status;
+		if (likely(!newerr)) {
+			unsigned long addr;
+			addr = idx_to_kaddr(netbk, pending_idx);
+			set_phys_to_machine(
+				__pa(addr)>>PAGE_SHIFT,
+				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+			netbk->grant_tx_handle[pending_idx] = mop->handle;
+			/* Had a previous error? Invalidate this fragment. */
+			if (unlikely(err))
+				netif_idx_release(netbk, pending_idx);
+			continue;
+		}
+
+		/* Error on this fragment: respond to client with an error. */
+		txp = &netbk->pending_tx_info[pending_idx].req;
+		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		index = pending_index(netbk->pending_prod++);
+		netbk->pending_ring[index] = pending_idx;
+		netif_put(netif);
+
+		/* Not the first error? Preceding frags already invalidated. */
+		if (err)
+			continue;
+
+		/* First error: invalidate header and preceding fragments. */
+		pending_idx = *((u16 *)skb->data);
+		netif_idx_release(netbk, pending_idx);
+		for (j = start; j < i; j++) {
+			pending_idx = (unsigned long)shinfo->frags[i].page;
+			netif_idx_release(netbk, pending_idx);
+		}
+
+		/* Remember the error: invalidate all subsequent fragments. */
+		err = newerr;
+	}
+
+	*mopp = mop + 1;
+	return err;
+}
+
+static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+{
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	int nr_frags = shinfo->nr_frags;
+	int i;
+
+	for (i = 0; i < nr_frags; i++) {
+		skb_frag_t *frag = shinfo->frags + i;
+		struct xen_netif_tx_request *txp;
+		unsigned long pending_idx;
+
+		pending_idx = (unsigned long)frag->page;
+
+		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
+		list_add_tail(&netbk->pending_inuse[pending_idx].list,
+			      &netbk->pending_inuse_head);
+
+		txp = &netbk->pending_tx_info[pending_idx].req;
+		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
+		frag->size = txp->size;
+		frag->page_offset = txp->offset;
+
+		skb->len += txp->size;
+		skb->data_len += txp->size;
+		skb->truesize += txp->size;
+	}
+}
+
+int netbk_get_extras(struct xen_netif *netif,
+		     struct xen_netif_extra_info *extras,
+		     int work_to_do)
+{
+	struct xen_netif_extra_info extra;
+	RING_IDX cons = netif->tx.req_cons;
+
+	do {
+		if (unlikely(work_to_do-- <= 0)) {
+			pr_debug("Missing extra info\n");
+			return -EBADR;
+		}
+
+		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
+		       sizeof(extra));
+		if (unlikely(!extra.type ||
+			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+			netif->tx.req_cons = ++cons;
+			pr_debug("Invalid extra type: %d\n", extra.type);
+			return -EINVAL;
+		}
+
+		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
+		netif->tx.req_cons = ++cons;
+	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+	return work_to_do;
+}
+
+static int netbk_set_skb_gso(struct sk_buff *skb,
+			     struct xen_netif_extra_info *gso)
+{
+	if (!gso->u.gso.size) {
+		pr_debug("GSO size must not be zero.\n");
+		return -EINVAL;
+	}
+
+	/* Currently only TCPv4 S.O. is supported. */
+	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+		pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
+		return -EINVAL;
+	}
+
+	skb_shinfo(skb)->gso_size = gso->u.gso.size;
+	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+	/* Header must be checked, and gso_segs computed. */
+	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb)->gso_segs = 0;
+
+	return 0;
+}
+
+static int checksum_setup(struct xen_netif *netif, struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	unsigned char *th;
+	int err = -EPROTO;
+	int recalculate_partial_csum = 0;
+
+	/*
+	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
+	 * peers can fail to set NETRXF_csum_blank when sending a GSO
+	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
+	 * recalculate the partial checksum.
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
+		netif->rx_gso_checksum_fixup++;
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		recalculate_partial_csum = 1;
+	}
+
+	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (skb->protocol != htons(ETH_P_IP))
+		goto out;
+
+	iph = (void *)skb->data;
+	th = skb->data + 4 * iph->ihl;
+	if (th >= skb_tail_pointer(skb))
+		goto out;
+
+	skb->csum_start = th - skb->head;
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+		skb->csum_offset = offsetof(struct tcphdr, check);
+
+		if (recalculate_partial_csum) {
+			struct tcphdr *tcph = (struct tcphdr *)th;
+			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+							 skb->len - iph->ihl*4,
+							 IPPROTO_TCP, 0);
+		}
+		break;
+	case IPPROTO_UDP:
+		skb->csum_offset = offsetof(struct udphdr, check);
+
+		if (recalculate_partial_csum) {
+			struct udphdr *udph = (struct udphdr *)th;
+			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+							 skb->len - iph->ihl*4,
+							 IPPROTO_UDP, 0);
+		}
+		break;
+	default:
+		if (net_ratelimit())
+			printk(KERN_ERR "Attempting to checksum a non-"
+			       "TCP/UDP packet, dropping a protocol"
+			       " %d packet", iph->protocol);
+		goto out;
+	}
+
+	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
+		goto out;
+
+	err = 0;
+
+out:
+	return err;
+}
+
+static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+{
+	unsigned long now = jiffies;
+	unsigned long next_credit =
+		netif->credit_timeout.expires +
+		msecs_to_jiffies(netif->credit_usec / 1000);
+
+	/* Timer could already be pending in rare cases. */
+	if (timer_pending(&netif->credit_timeout))
+		return true;
+
+	/* Passed the point where we can replenish credit? */
+	if (time_after_eq(now, next_credit)) {
+		netif->credit_timeout.expires = now;
+		tx_add_credit(netif);
+	}
+
+	/* Still too big to send right now? Set a callback. */
+	if (size > netif->remaining_credit) {
+		netif->credit_timeout.data     =
+			(unsigned long)netif;
+		netif->credit_timeout.function =
+			tx_credit_callback;
+		mod_timer(&netif->credit_timeout,
+			  next_credit);
+
+		return true;
+	}
+
+	return false;
+}
+
+static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+{
+	struct gnttab_map_grant_ref *mop;
+	struct sk_buff *skb;
+	int ret;
+
+	mop = netbk->tx_map_ops;
+	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+		!list_empty(&netbk->net_schedule_list)) {
+		struct xen_netif *netif;
+		struct xen_netif_tx_request txreq;
+		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
+		u16 pending_idx;
+		RING_IDX idx;
+		int work_to_do;
+		unsigned int data_len;
+		pending_ring_idx_t index;
+
+		/* Get a netif from the list with work to do. */
+		netif = poll_net_schedule_list(netbk);
+		if (!netif)
+			continue;
+
+		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+		if (!work_to_do) {
+			netif_put(netif);
+			continue;
+		}
+
+		idx = netif->tx.req_cons;
+		rmb(); /* Ensure that we see the request before we copy it. */
+		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
+
+		/* Credit-based scheduling. */
+		if (txreq.size > netif->remaining_credit &&
+		    tx_credit_exceeded(netif, txreq.size)) {
+			netif_put(netif);
+			continue;
+		}
+
+		netif->remaining_credit -= txreq.size;
+
+		work_to_do--;
+		netif->tx.req_cons = ++idx;
+
+		memset(extras, 0, sizeof(extras));
+		if (txreq.flags & NETTXF_extra_info) {
+			work_to_do = netbk_get_extras(netif, extras,
+						      work_to_do);
+			idx = netif->tx.req_cons;
+			if (unlikely(work_to_do < 0)) {
+				netbk_tx_err(netif, &txreq, idx);
+				continue;
+			}
+		}
+
+		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
+		if (unlikely(ret < 0)) {
+			netbk_tx_err(netif, &txreq, idx - ret);
+			continue;
+		}
+		idx += ret;
+
+		if (unlikely(txreq.size < ETH_HLEN)) {
+			pr_debug("Bad packet size: %d\n", txreq.size);
+			netbk_tx_err(netif, &txreq, idx);
+			continue;
+		}
+
+		/* No crossing a page as the payload mustn't fragment. */
+		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
+			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
+				 txreq.offset, txreq.size,
+				 (txreq.offset&~PAGE_MASK) + txreq.size);
+			netbk_tx_err(netif, &txreq, idx);
+			continue;
+		}
+
+		index = pending_index(netbk->pending_cons);
+		pending_idx = netbk->pending_ring[index];
+
+		data_len = (txreq.size > PKT_PROT_LEN &&
+			    ret < MAX_SKB_FRAGS) ?
+			PKT_PROT_LEN : txreq.size;
+
+		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
+				GFP_ATOMIC | __GFP_NOWARN);
+		if (unlikely(skb == NULL)) {
+			pr_debug("Can't allocate a skb in start_xmit.\n");
+			netbk_tx_err(netif, &txreq, idx);
+			break;
+		}
+
+		/* Packets passed to netif_rx() must have some headroom. */
+		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+
+		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+			struct xen_netif_extra_info *gso;
+			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+			if (netbk_set_skb_gso(skb, gso)) {
+				kfree_skb(skb);
+				netbk_tx_err(netif, &txreq, idx);
+				continue;
+			}
+		}
+
+		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
+				  GNTMAP_host_map | GNTMAP_readonly,
+				  txreq.gref, netif->domid);
+		mop++;
+
+		memcpy(&netbk->pending_tx_info[pending_idx].req,
+		       &txreq, sizeof(txreq));
+		netbk->pending_tx_info[pending_idx].netif = netif;
+		*((u16 *)skb->data) = pending_idx;
+
+		__skb_put(skb, data_len);
+
+		skb_shinfo(skb)->nr_frags = ret;
+		if (data_len < txreq.size) {
+			skb_shinfo(skb)->nr_frags++;
+			skb_shinfo(skb)->frags[0].page =
+				(void *)(unsigned long)pending_idx;
+		} else {
+			/* Discriminate from any valid pending_idx value. */
+			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
+		}
+
+		__skb_queue_tail(&netbk->tx_queue, skb);
+
+		netbk->pending_cons++;
+
+		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
+
+		netif->tx.req_cons = idx;
+		netif_schedule_work(netif);
+
+		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+			break;
+	}
+
+	return mop - netbk->tx_map_ops;
+}
+
+static void net_tx_submit(struct xen_netbk *netbk)
+{
+	struct gnttab_map_grant_ref *mop;
+	struct sk_buff *skb;
+
+	mop = netbk->tx_map_ops;
+	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
+		struct xen_netif_tx_request *txp;
+		struct xen_netif *netif;
+		u16 pending_idx;
+		unsigned data_len;
+
+		pending_idx = *((u16 *)skb->data);
+		netif = netbk->pending_tx_info[pending_idx].netif;
+		txp = &netbk->pending_tx_info[pending_idx].req;
+
+		/* Check the remap error code. */
+		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
+			pr_debug("netback grant failed.\n");
+			skb_shinfo(skb)->nr_frags = 0;
+			kfree_skb(skb);
+			continue;
+		}
+
+		data_len = skb->len;
+		memcpy(skb->data,
+		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
+		       data_len);
+		if (data_len < txp->size) {
+			/* Append the packet payload as a fragment. */
+			txp->offset += data_len;
+			txp->size -= data_len;
+		} else {
+			/* Schedule a response immediately. */
+			netif_idx_release(netbk, pending_idx);
+		}
+
+		if (txp->flags & NETTXF_csum_blank)
+			skb->ip_summed = CHECKSUM_PARTIAL;
+		else if (txp->flags & NETTXF_data_validated)
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		netbk_fill_frags(netbk, skb);
+
+		/*
+		 * If the initial fragment was < PKT_PROT_LEN then
+		 * pull through some bytes from the other fragments to
+		 * increase the linear region to PKT_PROT_LEN bytes.
+		 */
+		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
+			int target = min_t(int, skb->len, PKT_PROT_LEN);
+			__pskb_pull_tail(skb, target - skb_headlen(skb));
+		}
+
+		skb->dev      = netif->dev;
+		skb->protocol = eth_type_trans(skb, skb->dev);
+
+		if (checksum_setup(netif, skb)) {
+			pr_debug("Can't setup checksum in net_tx_action\n");
+			kfree_skb(skb);
+			continue;
+		}
+
+		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
+		    unlikely(skb_linearize(skb))) {
+			pr_debug("Can't linearize skb in net_tx_action.\n");
+			kfree_skb(skb);
+			continue;
+		}
+
+		netif->stats.rx_bytes += skb->len;
+		netif->stats.rx_packets++;
+
+		netif_rx_ni(skb);
+		netif->dev->last_rx = jiffies;
+	}
+}
+
+/* Called after netfront has transmitted */
+static void net_tx_action(unsigned long data)
+{
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
+	unsigned nr_mops;
+	int ret;
+
+	net_tx_action_dealloc(netbk);
+
+	nr_mops = net_tx_build_mops(netbk);
+
+	if (nr_mops == 0)
+		goto out;
+
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+					netbk->tx_map_ops, nr_mops);
+	BUG_ON(ret);
+
+	net_tx_submit(netbk);
+out:
+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+	    !list_empty(&netbk->pending_inuse_head)) {
+		struct netbk_tx_pending_inuse *oldest;
+
+		oldest = list_entry(netbk->pending_inuse_head.next,
+				    struct netbk_tx_pending_inuse, list);
+		mod_timer(&netbk->netbk_tx_pending_timer,
+				oldest->alloc_time + HZ);
+	}
+}
+
+static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+{
+	static DEFINE_SPINLOCK(_lock);
+	unsigned long flags;
+	pending_ring_idx_t index;
+
+	spin_lock_irqsave(&_lock, flags);
+	index = pending_index(netbk->dealloc_prod);
+	netbk->dealloc_ring[index] = pending_idx;
+	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+	smp_wmb();
+	netbk->dealloc_prod++;
+	spin_unlock_irqrestore(&_lock, flags);
+
+	xen_netbk_bh_handler(netbk, 0);
+}
+
+static void netif_page_release(struct page *page, unsigned int order)
+{
+	unsigned int group, idx;
+	int foreign = netif_get_page_ext(page, &group, &idx);
+
+	BUG_ON(!foreign);
+	BUG_ON(order);
+
+	netif_idx_release(&xen_netbk[group], idx);
+}
+
+irqreturn_t netif_be_int(int irq, void *dev_id)
+{
+	struct xen_netif *netif = dev_id;
+	struct xen_netbk *netbk;
+
+	if (netif->group == -1)
+		return IRQ_NONE;
+
+	netbk = &xen_netbk[netif->group];
+
+	add_to_net_schedule_list_tail(netif);
+	maybe_schedule_tx_action(netbk);
+
+	if (netif_schedulable(netif) && !netbk_queue_full(netif))
+		netif_wake_queue(netif->dev);
+
+	return IRQ_HANDLED;
+}
+
+static void make_tx_response(struct xen_netif *netif,
+			     struct xen_netif_tx_request *txp,
+			     s8       st)
+{
+	RING_IDX i = netif->tx.rsp_prod_pvt;
+	struct xen_netif_tx_response *resp;
+	int notify;
+
+	resp = RING_GET_RESPONSE(&netif->tx, i);
+	resp->id     = txp->id;
+	resp->status = st;
+
+	if (txp->flags & NETTXF_extra_info)
+		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
+
+	netif->tx.rsp_prod_pvt = ++i;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+	if (notify)
+		notify_remote_via_irq(netif->irq);
+}
+
+static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+					     u16      id,
+					     s8       st,
+					     u16      offset,
+					     u16      size,
+					     u16      flags)
+{
+	RING_IDX i = netif->rx.rsp_prod_pvt;
+	struct xen_netif_rx_response *resp;
+
+	resp = RING_GET_RESPONSE(&netif->rx, i);
+	resp->offset     = offset;
+	resp->flags      = flags;
+	resp->id         = id;
+	resp->status     = (s16)size;
+	if (st < 0)
+		resp->status = (s16)st;
+
+	netif->rx.rsp_prod_pvt = ++i;
+
+	return resp;
+}
+
+#ifdef NETBE_DEBUG_INTERRUPT
+static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct list_head *ent;
+	struct xen_netif *netif;
+	int i = 0;
+	int group = 0;
+
+	printk(KERN_ALERT "netif_schedule_list:\n");
+
+	for (group = 0; group < xen_netbk_group_nr; group++) {
+		struct xen_netbk *netbk = &xen_netbk[group];
+		spin_lock_irq(&netbk->net_schedule_list_lock);
+		printk(KERN_ALERT "xen_netback group number: %d\n", group);
+		list_for_each(ent, &netbk->net_schedule_list) {
+			netif = list_entry(ent, struct xen_netif, list);
+			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+				"rx_resp_prod=%08x\n",
+				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
+			printk(KERN_ALERT
+				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
+				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
+			printk(KERN_ALERT
+				"   shared(rx_req_prod=%08x "
+				"rx_resp_prod=%08x\n",
+				netif->rx.sring->req_prod,
+				netif->rx.sring->rsp_prod);
+			printk(KERN_ALERT
+				"   rx_event=%08x, tx_req_prod=%08x\n",
+				netif->rx.sring->rsp_event,
+				netif->tx.sring->req_prod);
+			printk(KERN_ALERT
+				"   tx_resp_prod=%08x, tx_event=%08x)\n",
+				netif->tx.sring->rsp_prod,
+				netif->tx.sring->rsp_event);
+			i++;
+		}
+		spin_unlock_irq(&netbk->net_schedule_list_lock);
+	}
+
+	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+
+	return IRQ_HANDLED;
+}
+#endif
+
+static inline int rx_work_todo(struct xen_netbk *netbk)
+{
+	return !skb_queue_empty(&netbk->rx_queue);
+}
+
+static inline int tx_work_todo(struct xen_netbk *netbk)
+{
+	if (netbk->dealloc_cons != netbk->dealloc_prod)
+		return 1;
+
+	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
+	    !list_empty(&netbk->pending_inuse_head))
+		return 1;
+
+	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+			!list_empty(&netbk->net_schedule_list))
+		return 1;
+
+	return 0;
+}
+
+static int netbk_action_thread(void *data)
+{
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(netbk->kthread.netbk_action_wq,
+				rx_work_todo(netbk)
+				|| tx_work_todo(netbk)
+				|| kthread_should_stop());
+		cond_resched();
+
+		if (kthread_should_stop())
+			break;
+
+		if (rx_work_todo(netbk))
+			net_rx_action((unsigned long)netbk);
+
+		if (tx_work_todo(netbk))
+			net_tx_action((unsigned long)netbk);
+	}
+
+	return 0;
+}
+
+static int __init netback_init(void)
+{
+	int i;
+	struct page *page;
+	int rc = 0;
+	int group;
+
+	if (!xen_pv_domain())
+		return -ENODEV;
+
+	xen_netbk_group_nr = num_online_cpus();
+	xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
+	if (!xen_netbk) {
+		printk(KERN_ALERT "%s: out of memory\n", __func__);
+		return -ENOMEM;
+	}
+	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
+
+	for (group = 0; group < xen_netbk_group_nr; group++) {
+		struct xen_netbk *netbk = &xen_netbk[group];
+		skb_queue_head_init(&netbk->rx_queue);
+		skb_queue_head_init(&netbk->tx_queue);
+
+		init_timer(&netbk->net_timer);
+		netbk->net_timer.data = (unsigned long)netbk;
+		netbk->net_timer.function = net_alarm;
+
+		init_timer(&netbk->netbk_tx_pending_timer);
+		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
+		netbk->netbk_tx_pending_timer.function =
+			netbk_tx_pending_timeout;
+
+		netbk->mmap_pages =
+			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+		if (!netbk->mmap_pages) {
+			printk(KERN_ALERT "%s: out of memory\n", __func__);
+			del_timer(&netbk->netbk_tx_pending_timer);
+			del_timer(&netbk->net_timer);
+			rc = -ENOMEM;
+			goto failed_init;
+		}
+
+		for (i = 0; i < MAX_PENDING_REQS; i++) {
+			page = netbk->mmap_pages[i];
+			SetPageForeign(page, netif_page_release);
+			netif_set_page_ext(page, group, i);
+			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
+		}
+
+		netbk->pending_cons = 0;
+		netbk->pending_prod = MAX_PENDING_REQS;
+		for (i = 0; i < MAX_PENDING_REQS; i++)
+			netbk->pending_ring[i] = i;
+
+		if (MODPARM_netback_kthread) {
+			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
+			netbk->kthread.task =
+				kthread_create(netbk_action_thread,
+					       (void *)netbk,
+					       "netback/%u", group);
+
+			if (!IS_ERR(netbk->kthread.task)) {
+				kthread_bind(netbk->kthread.task, group);
+			} else {
+				printk(KERN_ALERT
+					"kthread_run() fails at netback\n");
+				free_empty_pages_and_pagevec(netbk->mmap_pages,
+						MAX_PENDING_REQS);
+				del_timer(&netbk->netbk_tx_pending_timer);
+				del_timer(&netbk->net_timer);
+				rc = PTR_ERR(netbk->kthread.task);
+				goto failed_init;
+			}
+		} else {
+			tasklet_init(&netbk->tasklet.net_tx_tasklet,
+				     net_tx_action,
+				     (unsigned long)netbk);
+			tasklet_init(&netbk->tasklet.net_rx_tasklet,
+				     net_rx_action,
+				     (unsigned long)netbk);
+		}
+
+		INIT_LIST_HEAD(&netbk->pending_inuse_head);
+		INIT_LIST_HEAD(&netbk->net_schedule_list);
+
+		spin_lock_init(&netbk->net_schedule_list_lock);
+
+		atomic_set(&netbk->netfront_count, 0);
+
+		if (MODPARM_netback_kthread)
+			wake_up_process(netbk->kthread.task);
+	}
+
+	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
+	if (MODPARM_copy_skb) {
+		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
+					      NULL, 0))
+			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
+		else
+			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
+	}
+
+	rc = netif_xenbus_init();
+	if (rc)
+		goto failed_init;
+
+#ifdef NETBE_DEBUG_INTERRUPT
+	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
+				      0,
+				      netif_be_dbg,
+				      IRQF_SHARED,
+				      "net-be-dbg",
+				      &netif_be_dbg);
+#endif
+
+	return 0;
+
+failed_init:
+	for (i = 0; i < group; i++) {
+		struct xen_netbk *netbk = &xen_netbk[i];
+		free_empty_pages_and_pagevec(netbk->mmap_pages,
+				MAX_PENDING_REQS);
+		del_timer(&netbk->netbk_tx_pending_timer);
+		del_timer(&netbk->net_timer);
+		if (MODPARM_netback_kthread)
+			kthread_stop(netbk->kthread.task);
+	}
+	vfree(xen_netbk);
+	return rc;
+
+}
+
+module_init(netback_init);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
new file mode 100644
index 0000000..867dc25
--- /dev/null
+++ b/drivers/net/xen-netback/xenbus.c
@@ -0,0 +1,489 @@
+/*
+ * Xenbus code for netif backend
+ *
+ * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
+ * Copyright (C) 2005 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "common.h"
+
+static int connect_rings(struct backend_info *);
+static void connect(struct backend_info *);
+static void backend_create_netif(struct backend_info *be);
+static void unregister_hotplug_status_watch(struct backend_info *be);
+
+static int netback_remove(struct xenbus_device *dev)
+{
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+	unregister_hotplug_status_watch(be);
+	if (be->netif) {
+		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+		netif_disconnect(be->netif);
+		be->netif = NULL;
+	}
+	kfree(be);
+	dev_set_drvdata(&dev->dev, NULL);
+	return 0;
+}
+
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and switch to InitWait.
+ */
+static int netback_probe(struct xenbus_device *dev,
+			 const struct xenbus_device_id *id)
+{
+	const char *message;
+	struct xenbus_transaction xbt;
+	int err;
+	int sg;
+	struct backend_info *be = kzalloc(sizeof(struct backend_info),
+					  GFP_KERNEL);
+	if (!be) {
+		xenbus_dev_fatal(dev, -ENOMEM,
+				 "allocating backend structure");
+		return -ENOMEM;
+	}
+
+	be->dev = dev;
+	dev_set_drvdata(&dev->dev, be);
+
+	sg = 1;
+	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
+		sg = 0;
+
+	do {
+		err = xenbus_transaction_start(&xbt);
+		if (err) {
+			xenbus_dev_fatal(dev, err, "starting transaction");
+			goto fail;
+		}
+
+		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
+		if (err) {
+			message = "writing feature-sg";
+			goto abort_transaction;
+		}
+
+		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
+				    "%d", sg);
+		if (err) {
+			message = "writing feature-gso-tcpv4";
+			goto abort_transaction;
+		}
+
+		/* We support rx-copy path. */
+		err = xenbus_printf(xbt, dev->nodename,
+				    "feature-rx-copy", "%d", 1);
+		if (err) {
+			message = "writing feature-rx-copy";
+			goto abort_transaction;
+		}
+
+		/*
+		 * We don't support rx-flip path (except old guests who don't
+		 * grok this feature flag).
+		 */
+		err = xenbus_printf(xbt, dev->nodename,
+				    "feature-rx-flip", "%d", 0);
+		if (err) {
+			message = "writing feature-rx-flip";
+			goto abort_transaction;
+		}
+
+		err = xenbus_transaction_end(xbt, 0);
+	} while (err == -EAGAIN);
+
+	if (err) {
+		xenbus_dev_fatal(dev, err, "completing transaction");
+		goto fail;
+	}
+
+	err = xenbus_switch_state(dev, XenbusStateInitWait);
+	if (err)
+		goto fail;
+
+	/* This kicks hotplug scripts, so do it immediately. */
+	backend_create_netif(be);
+
+	return 0;
+
+abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(dev, err, "%s", message);
+fail:
+	pr_debug("failed");
+	netback_remove(dev);
+	return err;
+}
+
+
+/*
+ * Handle the creation of the hotplug script environment.  We add the script
+ * and vif variables to the environment, for the benefit of the vif-* hotplug
+ * scripts.
+ */
+static int netback_uevent(struct xenbus_device *xdev,
+			  struct kobj_uevent_env *env)
+{
+	struct backend_info *be = dev_get_drvdata(&xdev->dev);
+	char *val;
+
+	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
+	if (IS_ERR(val)) {
+		int err = PTR_ERR(val);
+		xenbus_dev_fatal(xdev, err, "reading script");
+		return err;
+	} else {
+		if (add_uevent_var(env, "script=%s", val)) {
+			kfree(val);
+			return -ENOMEM;
+		}
+		kfree(val);
+	}
+
+	if (!be || !be->netif)
+		return 0;
+
+	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
+}
+
+
+static void backend_create_netif(struct backend_info *be)
+{
+	int err;
+	long handle;
+	struct xenbus_device *dev = be->dev;
+
+	if (be->netif != NULL)
+		return;
+
+	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
+	if (err != 1) {
+		xenbus_dev_fatal(dev, err, "reading handle");
+		return;
+	}
+
+	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
+	if (IS_ERR(be->netif)) {
+		err = PTR_ERR(be->netif);
+		be->netif = NULL;
+		xenbus_dev_fatal(dev, err, "creating interface");
+		return;
+	}
+
+	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
+}
+
+
+static void disconnect_backend(struct xenbus_device *dev)
+{
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+	if (be->netif) {
+		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+		netif_disconnect(be->netif);
+		be->netif = NULL;
+	}
+}
+
+/**
+ * Callback received when the frontend's state changes.
+ */
+static void frontend_changed(struct xenbus_device *dev,
+			     enum xenbus_state frontend_state)
+{
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+	pr_debug("frontend state %s", xenbus_strstate(frontend_state));
+
+	be->frontend_state = frontend_state;
+
+	switch (frontend_state) {
+	case XenbusStateInitialising:
+		if (dev->state == XenbusStateClosed) {
+			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
+			       __func__, dev->nodename);
+			xenbus_switch_state(dev, XenbusStateInitWait);
+		}
+		break;
+
+	case XenbusStateInitialised:
+		break;
+
+	case XenbusStateConnected:
+		if (dev->state == XenbusStateConnected)
+			break;
+		backend_create_netif(be);
+		if (be->netif)
+			connect(be);
+		break;
+
+	case XenbusStateClosing:
+		if (be->netif)
+			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+		disconnect_backend(dev);
+		xenbus_switch_state(dev, XenbusStateClosing);
+		break;
+
+	case XenbusStateClosed:
+		xenbus_switch_state(dev, XenbusStateClosed);
+		if (xenbus_dev_is_online(dev))
+			break;
+		/* fall through if not online */
+	case XenbusStateUnknown:
+		device_unregister(&dev->dev);
+		break;
+
+	default:
+		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
+				 frontend_state);
+		break;
+	}
+}
+
+
+static void xen_net_read_rate(struct xenbus_device *dev,
+			      unsigned long *bytes, unsigned long *usec)
+{
+	char *s, *e;
+	unsigned long b, u;
+	char *ratestr;
+
+	/* Default to unlimited bandwidth. */
+	*bytes = ~0UL;
+	*usec = 0;
+
+	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
+	if (IS_ERR(ratestr))
+		return;
+
+	s = ratestr;
+	b = simple_strtoul(s, &e, 10);
+	if ((s == e) || (*e != ','))
+		goto fail;
+
+	s = e + 1;
+	u = simple_strtoul(s, &e, 10);
+	if ((s == e) || (*e != '\0'))
+		goto fail;
+
+	*bytes = b;
+	*usec = u;
+
+	kfree(ratestr);
+	return;
+
+ fail:
+	pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
+	kfree(ratestr);
+}
+
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+	char *s, *e, *macstr;
+	int i;
+
+	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
+	if (IS_ERR(macstr))
+		return PTR_ERR(macstr);
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac[i] = simple_strtoul(s, &e, 16);
+		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+			kfree(macstr);
+			return -ENOENT;
+		}
+		s = e+1;
+	}
+
+	kfree(macstr);
+	return 0;
+}
+
+static void unregister_hotplug_status_watch(struct backend_info *be)
+{
+	if (be->have_hotplug_status_watch) {
+		unregister_xenbus_watch(&be->hotplug_status_watch);
+		kfree(be->hotplug_status_watch.node);
+	}
+	be->have_hotplug_status_watch = 0;
+}
+
+static void hotplug_status_changed(struct xenbus_watch *watch,
+				   const char **vec,
+				   unsigned int vec_size)
+{
+	struct backend_info *be = container_of(watch,
+					       struct backend_info,
+					       hotplug_status_watch);
+	char *str;
+	unsigned int len;
+
+	str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
+	if (IS_ERR(str))
+		return;
+	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
+		xenbus_switch_state(be->dev, XenbusStateConnected);
+		/* Not interested in this watch anymore. */
+		unregister_hotplug_status_watch(be);
+	}
+	kfree(str);
+}
+
+static void connect(struct backend_info *be)
+{
+	int err;
+	struct xenbus_device *dev = be->dev;
+
+	err = connect_rings(be);
+	if (err)
+		return;
+
+	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+		return;
+	}
+
+	xen_net_read_rate(dev, &be->netif->credit_bytes,
+			  &be->netif->credit_usec);
+	be->netif->remaining_credit = be->netif->credit_bytes;
+
+	unregister_hotplug_status_watch(be);
+	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
+				   hotplug_status_changed,
+				   "%s/%s", dev->nodename, "hotplug-status");
+	if (err) {
+		/* Switch now, since we can't do a watch. */
+		xenbus_switch_state(dev, XenbusStateConnected);
+	} else {
+		be->have_hotplug_status_watch = 1;
+	}
+
+	netif_wake_queue(be->netif->dev);
+}
+
+
+static int connect_rings(struct backend_info *be)
+{
+	struct xen_netif *netif = be->netif;
+	struct xenbus_device *dev = be->dev;
+	unsigned long tx_ring_ref, rx_ring_ref;
+	unsigned int evtchn, rx_copy;
+	int err;
+	int val;
+
+	err = xenbus_gather(XBT_NIL, dev->otherend,
+			    "tx-ring-ref", "%lu", &tx_ring_ref,
+			    "rx-ring-ref", "%lu", &rx_ring_ref,
+			    "event-channel", "%u", &evtchn, NULL);
+	if (err) {
+		xenbus_dev_fatal(dev, err,
+				 "reading %s/ring-ref and event-channel",
+				 dev->otherend);
+		return err;
+	}
+
+	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
+			   &rx_copy);
+	if (err == -ENOENT) {
+		err = 0;
+		rx_copy = 0;
+	}
+	if (err < 0) {
+		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
+				 dev->otherend);
+		return err;
+	}
+	if (!rx_copy)
+		return -EOPNOTSUPP;
+
+	if (netif->dev->tx_queue_len != 0) {
+		if (xenbus_scanf(XBT_NIL, dev->otherend,
+				 "feature-rx-notify", "%d", &val) < 0)
+			val = 0;
+		if (val)
+			netif->can_queue = 1;
+		else
+			/* Must be non-zero for pfifo_fast to work. */
+			netif->dev->tx_queue_len = 1;
+	}
+
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
+			 "%d", &val) < 0)
+		val = 0;
+	netif->can_sg = !!val;
+
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
+			 "%d", &val) < 0)
+		val = 0;
+	netif->gso = !!val;
+
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
+			 "%d", &val) < 0)
+		val = 0;
+	netif->gso_prefix = !!val;
+
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
+			 "%d", &val) < 0)
+		val = 0;
+	netif->csum = !val;
+
+	/* Set dev->features */
+	netif_set_features(netif);
+
+	/* Map the shared frame, irq etc. */
+	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
+	if (err) {
+		xenbus_dev_fatal(dev, err,
+				 "mapping shared-frames %lu/%lu port %u",
+				 tx_ring_ref, rx_ring_ref, evtchn);
+		return err;
+	}
+	return 0;
+}
+
+
+/* ** Driver Registration ** */
+
+
+static const struct xenbus_device_id netback_ids[] = {
+	{ "vif" },
+	{ "" }
+};
+
+
+static struct xenbus_driver netback = {
+	.name = "vif",
+	.owner = THIS_MODULE,
+	.ids = netback_ids,
+	.probe = netback_probe,
+	.remove = netback_remove,
+	.uevent = netback_uevent,
+	.otherend_changed = frontend_changed,
+};
+
+
+int netif_xenbus_init(void)
+{
+	printk(KERN_CRIT "registering netback\n");
+	return xenbus_register_backend(&netback);
+}
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 30290a8..5a48ce9 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -37,13 +37,6 @@ config XEN_BACKEND
 	depends on XEN_PCIDEV_BACKEND


-config XEN_NETDEV_BACKEND
-       tristate "Xen backend network device"
-       depends on XEN_BACKEND && NET
-       help
-         Implement the network backend driver, which passes packets
-         from the guest domain's frontend drivers to the network.
-
 config XENFS
 	tristate "Xen filesystem"
 	default y
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index c0e0509..533a199 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,7 +9,6 @@ obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
 obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
-obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
 obj-$(CONFIG_XENFS)		+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_PLATFORM_PCI)	+= platform-pci.o
diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
deleted file mode 100644
index e346e81..0000000
--- a/drivers/xen/netback/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
-
-xen-netback-y := netback.o xenbus.o interface.o
diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
deleted file mode 100644
index 49dc4cf..0000000
--- a/drivers/xen/netback/common.h
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef __NETIF__BACKEND__COMMON_H__
-#define __NETIF__BACKEND__COMMON_H__
-
-#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
-
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/io.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
-
-#include <xen/interface/io/netif.h>
-#include <asm/pgalloc.h>
-#include <xen/interface/grant_table.h>
-#include <xen/grant_table.h>
-#include <xen/xenbus.h>
-
-struct xen_netif {
-	/* Unique identifier for this interface. */
-	domid_t          domid;
-	int              group;
-	unsigned int     handle;
-
-	u8               fe_dev_addr[6];
-
-	/* Physical parameters of the comms window. */
-	grant_handle_t   tx_shmem_handle;
-	grant_ref_t      tx_shmem_ref;
-	grant_handle_t   rx_shmem_handle;
-	grant_ref_t      rx_shmem_ref;
-	unsigned int     irq;
-
-	/* The shared rings and indexes. */
-	struct xen_netif_tx_back_ring tx;
-	struct xen_netif_rx_back_ring rx;
-	struct vm_struct *tx_comms_area;
-	struct vm_struct *rx_comms_area;
-
-	/* Flags that must not be set in dev->features */
-	int features_disabled;
-
-	/* Frontend feature information. */
-	u8 can_sg:1;
-	u8 gso:1;
-	u8 gso_prefix:1;
-	u8 csum:1;
-
-	/* Internal feature information. */
-	u8 can_queue:1;	    /* can queue packets for receiver? */
-
-	/* Allow netif_be_start_xmit() to peek ahead in the rx request
-	 * ring.  This is a prediction of what rx_req_cons will be once
-	 * all queued skbs are put on the ring. */
-	RING_IDX rx_req_cons_peek;
-
-	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
-	unsigned long   credit_bytes;
-	unsigned long   credit_usec;
-	unsigned long   remaining_credit;
-	struct timer_list credit_timeout;
-
-	/* Statistics */
-	int nr_copied_skbs;
-	int rx_gso_checksum_fixup;
-
-	/* Miscellaneous private stuff. */
-	struct list_head list;  /* scheduling list */
-	atomic_t         refcnt;
-	struct net_device *dev;
-	struct net_device_stats stats;
-
-	unsigned int carrier;
-
-	wait_queue_head_t waiting_to_free;
-};
-
-/*
- * Implement our own carrier flag: the network stack's version causes delays
- * when the carrier is re-enabled (in particular, dev_activate() may not
- * immediately be called, which can cause packet loss; also the etherbridge
- * can be rather lazy in activating its port).
- */
-#define netback_carrier_on(netif)	((netif)->carrier = 1)
-#define netback_carrier_off(netif)	((netif)->carrier = 0)
-#define netback_carrier_ok(netif)	((netif)->carrier)
-
-enum {
-	NETBK_DONT_COPY_SKB,
-	NETBK_DELAYED_COPY_SKB,
-	NETBK_ALWAYS_COPY_SKB,
-};
-
-extern int netbk_copy_skb_mode;
-
-struct backend_info {
-	struct xenbus_device *dev;
-	struct xen_netif *netif;
-	enum xenbus_state frontend_state;
-	struct xenbus_watch hotplug_status_watch;
-	int have_hotplug_status_watch:1;
-};
-
-#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
-
-void netif_disconnect(struct xen_netif *netif);
-
-void netif_set_features(struct xen_netif *netif);
-struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
-			      unsigned int handle);
-int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
-	      unsigned long rx_ring_ref, unsigned int evtchn);
-
-static inline void netif_get(struct xen_netif *netif)
-{
-	atomic_inc(&netif->refcnt);
-}
-
-static inline void  netif_put(struct xen_netif *netif)
-{
-	if (atomic_dec_and_test(&netif->refcnt))
-		wake_up(&netif->waiting_to_free);
-}
-
-int netif_xenbus_init(void);
-
-#define netif_schedulable(netif)				\
-	(netif_running((netif)->dev) && netback_carrier_ok(netif))
-
-void netif_schedule_work(struct xen_netif *netif);
-void netif_deschedule_work(struct xen_netif *netif);
-
-int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
-struct net_device_stats *netif_be_get_stats(struct net_device *dev);
-irqreturn_t netif_be_int(int irq, void *dev_id);
-
-static inline int netbk_can_queue(struct net_device *dev)
-{
-	struct xen_netif *netif = netdev_priv(dev);
-	return netif->can_queue;
-}
-
-static inline int netbk_can_sg(struct net_device *dev)
-{
-	struct xen_netif *netif = netdev_priv(dev);
-	return netif->can_sg;
-}
-
-struct pending_tx_info {
-	struct xen_netif_tx_request req;
-	struct xen_netif *netif;
-};
-typedef unsigned int pending_ring_idx_t;
-
-struct netbk_rx_meta {
-	int id;
-	int size;
-	int gso_size;
-};
-
-struct netbk_tx_pending_inuse {
-	struct list_head list;
-	unsigned long alloc_time;
-};
-
-#define MAX_PENDING_REQS 256
-
-#define MAX_BUFFER_OFFSET PAGE_SIZE
-
-/* extra field used in struct page */
-union page_ext {
-	struct {
-#if BITS_PER_LONG < 64
-#define IDX_WIDTH   8
-#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
-		unsigned int group:GROUP_WIDTH;
-		unsigned int idx:IDX_WIDTH;
-#else
-		unsigned int group, idx;
-#endif
-	} e;
-	void *mapping;
-};
-
-struct xen_netbk {
-	union {
-		struct {
-			struct tasklet_struct net_tx_tasklet;
-			struct tasklet_struct net_rx_tasklet;
-		} tasklet;
-
-		struct {
-			wait_queue_head_t netbk_action_wq;
-			struct task_struct *task;
-		} kthread;
-	};
-
-	struct sk_buff_head rx_queue;
-	struct sk_buff_head tx_queue;
-
-	struct timer_list net_timer;
-	struct timer_list netbk_tx_pending_timer;
-
-	struct page **mmap_pages;
-
-	pending_ring_idx_t pending_prod;
-	pending_ring_idx_t pending_cons;
-	pending_ring_idx_t dealloc_prod;
-	pending_ring_idx_t dealloc_cons;
-
-	struct list_head pending_inuse_head;
-	struct list_head net_schedule_list;
-
-	/* Protect the net_schedule_list in netif. */
-	spinlock_t net_schedule_list_lock;
-
-	atomic_t netfront_count;
-
-	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
-
-	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-	u16 pending_ring[MAX_PENDING_REQS];
-	u16 dealloc_ring[MAX_PENDING_REQS];
-
-	/*
-	 * Each head or fragment can be up to 4096 bytes. Given
-	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
-	 * head/fragment uses 2 copy operation.
-	 */
-	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
-	unsigned char rx_notify[NR_IRQS];
-	u16 notify_list[NET_RX_RING_SIZE];
-	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
-};
-
-extern struct xen_netbk *xen_netbk;
-extern int xen_netbk_group_nr;
-
-#endif /* __NETIF__BACKEND__COMMON_H__ */
diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
deleted file mode 100644
index b429f8c..0000000
--- a/drivers/xen/netback/interface.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Network-device interface management.
- *
- * Copyright (c) 2004-2005, Keir Fraser
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "common.h"
-
-#include <linux/ethtool.h>
-#include <linux/rtnetlink.h>
-
-#include <xen/events.h>
-#include <asm/xen/hypercall.h>
-
-/*
- * Module parameter 'queue_length':
- *
- * Enables queuing in the network stack when a client has run out of receive
- * descriptors.
- */
-static unsigned long netbk_queue_length = 32;
-module_param_named(queue_length, netbk_queue_length, ulong, 0644);
-
-static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
-			   struct xen_netif *netif)
-{
-	int i;
-	int min_netfront_count;
-	int min_group = 0;
-	min_netfront_count = atomic_read(&netbk[0].netfront_count);
-	for (i = 0; i < group_nr; i++) {
-		int netfront_count = atomic_read(&netbk[i].netfront_count);
-		if (netfront_count < min_netfront_count) {
-			min_group = i;
-			min_netfront_count = netfront_count;
-		}
-	}
-
-	netif->group = min_group;
-	atomic_inc(&netbk[netif->group].netfront_count);
-}
-
-static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
-{
-	atomic_dec(&netbk[netif->group].netfront_count);
-}
-
-static void __netif_up(struct xen_netif *netif)
-{
-	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
-	enable_irq(netif->irq);
-	netif_schedule_work(netif);
-}
-
-static void __netif_down(struct xen_netif *netif)
-{
-	disable_irq(netif->irq);
-	netif_deschedule_work(netif);
-	netbk_remove_netif(xen_netbk, netif);
-}
-
-static int net_open(struct net_device *dev)
-{
-	struct xen_netif *netif = netdev_priv(dev);
-	if (netback_carrier_ok(netif)) {
-		__netif_up(netif);
-		netif_start_queue(dev);
-	}
-	return 0;
-}
-
-static int net_close(struct net_device *dev)
-{
-	struct xen_netif *netif = netdev_priv(dev);
-	if (netback_carrier_ok(netif))
-		__netif_down(netif);
-	netif_stop_queue(dev);
-	return 0;
-}
-
-static int netbk_change_mtu(struct net_device *dev, int mtu)
-{
-	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
-
-	if (mtu > max)
-		return -EINVAL;
-	dev->mtu = mtu;
-	return 0;
-}
-
-void netif_set_features(struct xen_netif *netif)
-{
-	struct net_device *dev = netif->dev;
-	int features = dev->features;
-
-	if (netif->can_sg)
-		features |= NETIF_F_SG;
-	if (netif->gso || netif->gso_prefix)
-		features |= NETIF_F_TSO;
-	if (netif->csum)
-		features |= NETIF_F_IP_CSUM;
-
-	features &= ~(netif->features_disabled);
-
-	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
-		dev->mtu = ETH_DATA_LEN;
-
-	dev->features = features;
-}
-
-static int netbk_set_tx_csum(struct net_device *dev, u32 data)
-{
-	struct xen_netif *netif = netdev_priv(dev);
-	if (data) {
-		if (!netif->csum)
-			return -ENOSYS;
-		netif->features_disabled &= ~NETIF_F_IP_CSUM;
-	} else {
-		netif->features_disabled |= NETIF_F_IP_CSUM;
-	}
-
-	netif_set_features(netif);
-	return 0;
-}
-
-static int netbk_set_sg(struct net_device *dev, u32 data)
-{
-	struct xen_netif *netif = netdev_priv(dev);
-	if (data) {
-		if (!netif->can_sg)
-			return -ENOSYS;
-		netif->features_disabled &= ~NETIF_F_SG;
-	} else {
-		netif->features_disabled |= NETIF_F_SG;
-	}
-
-	netif_set_features(netif);
-	return 0;
-}
-
-static int netbk_set_tso(struct net_device *dev, u32 data)
-{
-	struct xen_netif *netif = netdev_priv(dev);
-	if (data) {
-		if (!netif->gso && !netif->gso_prefix)
-			return -ENOSYS;
-		netif->features_disabled &= ~NETIF_F_TSO;
-	} else {
-		netif->features_disabled |= NETIF_F_TSO;
-	}
-
-	netif_set_features(netif);
-	return 0;
-}
-
-static void netbk_get_drvinfo(struct net_device *dev,
-			      struct ethtool_drvinfo *info)
-{
-	strcpy(info->driver, "netbk");
-	strcpy(info->bus_info, dev_name(dev->dev.parent));
-}
-
-static const struct netif_stat {
-	char name[ETH_GSTRING_LEN];
-	u16 offset;
-} netbk_stats[] = {
-	{
-		"copied_skbs",
-		offsetof(struct xen_netif, nr_copied_skbs)
-	},
-	{
-		"rx_gso_checksum_fixup",
-		offsetof(struct xen_netif, rx_gso_checksum_fixup)
-	},
-};
-
-static int netbk_get_sset_count(struct net_device *dev, int string_set)
-{
-	switch (string_set) {
-	case ETH_SS_STATS:
-		return ARRAY_SIZE(netbk_stats);
-	default:
-		return -EINVAL;
-	}
-}
-
-static void netbk_get_ethtool_stats(struct net_device *dev,
-				   struct ethtool_stats *stats, u64 * data)
-{
-	void *netif = netdev_priv(dev);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
-		data[i] = *(int *)(netif + netbk_stats[i].offset);
-}
-
-static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
-{
-	int i;
-
-	switch (stringset) {
-	case ETH_SS_STATS:
-		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
-			memcpy(data + i * ETH_GSTRING_LEN,
-			       netbk_stats[i].name, ETH_GSTRING_LEN);
-		break;
-	}
-}
-
-static struct ethtool_ops network_ethtool_ops = {
-	.get_drvinfo = netbk_get_drvinfo,
-
-	.get_tx_csum = ethtool_op_get_tx_csum,
-	.set_tx_csum = netbk_set_tx_csum,
-	.get_sg = ethtool_op_get_sg,
-	.set_sg = netbk_set_sg,
-	.get_tso = ethtool_op_get_tso,
-	.set_tso = netbk_set_tso,
-	.get_link = ethtool_op_get_link,
-
-	.get_sset_count = netbk_get_sset_count,
-	.get_ethtool_stats = netbk_get_ethtool_stats,
-	.get_strings = netbk_get_strings,
-};
-
-static struct net_device_ops netback_ops = {
-	.ndo_start_xmit	= netif_be_start_xmit,
-	.ndo_get_stats	= netif_be_get_stats,
-	.ndo_open	= net_open,
-	.ndo_stop	= net_close,
-	.ndo_change_mtu	= netbk_change_mtu,
-};
-
-struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
-			      unsigned int handle)
-{
-	int err = 0;
-	struct net_device *dev;
-	struct xen_netif *netif;
-	char name[IFNAMSIZ] = {};
-
-	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
-	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
-	if (dev == NULL) {
-		pr_debug("Could not allocate netdev\n");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	SET_NETDEV_DEV(dev, parent);
-
-	netif = netdev_priv(dev);
-	memset(netif, 0, sizeof(*netif));
-	netif->domid  = domid;
-	netif->group  = -1;
-	netif->handle = handle;
-	netif->can_sg = 1;
-	netif->csum = 1;
-	atomic_set(&netif->refcnt, 1);
-	init_waitqueue_head(&netif->waiting_to_free);
-	netif->dev = dev;
-	INIT_LIST_HEAD(&netif->list);
-
-	netback_carrier_off(netif);
-
-	netif->credit_bytes = netif->remaining_credit = ~0UL;
-	netif->credit_usec  = 0UL;
-	init_timer(&netif->credit_timeout);
-	/* Initialize 'expires' now: it's used to track the credit window. */
-	netif->credit_timeout.expires = jiffies;
-
-	dev->netdev_ops	= &netback_ops;
-	netif_set_features(netif);
-	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
-
-	dev->tx_queue_len = netbk_queue_length;
-
-	/*
-	 * Initialise a dummy MAC address. We choose the numerically
-	 * largest non-broadcast address to prevent the address getting
-	 * stolen by an Ethernet bridge for STP purposes.
-	 * (FE:FF:FF:FF:FF:FF)
-	 */
-	memset(dev->dev_addr, 0xFF, ETH_ALEN);
-	dev->dev_addr[0] &= ~0x01;
-
-	rtnl_lock();
-	err = register_netdevice(dev);
-	rtnl_unlock();
-	if (err) {
-		pr_debug("Could not register new net device %s: err=%d\n",
-			 dev->name, err);
-		free_netdev(dev);
-		return ERR_PTR(err);
-	}
-
-	pr_debug("Successfully created netif\n");
-	return netif;
-}
-
-static int map_frontend_pages(struct xen_netif *netif,
-			      grant_ref_t tx_ring_ref,
-			      grant_ref_t rx_ring_ref)
-{
-	struct gnttab_map_grant_ref op;
-
-	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
-			  GNTMAP_host_map, tx_ring_ref, netif->domid);
-
-	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-		BUG();
-
-	if (op.status) {
-		pr_debug("Gnttab failure mapping tx_ring_ref!\n");
-		return op.status;
-	}
-
-	netif->tx_shmem_ref    = tx_ring_ref;
-	netif->tx_shmem_handle = op.handle;
-
-	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
-			  GNTMAP_host_map, rx_ring_ref, netif->domid);
-
-	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-		BUG();
-
-	if (op.status) {
-		struct gnttab_unmap_grant_ref unop;
-
-		gnttab_set_unmap_op(&unop,
-				    (unsigned long)netif->tx_comms_area->addr,
-				    GNTMAP_host_map, netif->tx_shmem_handle);
-		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
-		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
-		return op.status;
-	}
-
-	netif->rx_shmem_ref    = rx_ring_ref;
-	netif->rx_shmem_handle = op.handle;
-
-	return 0;
-}
-
-static void unmap_frontend_pages(struct xen_netif *netif)
-{
-	struct gnttab_unmap_grant_ref op;
-
-	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
-			    GNTMAP_host_map, netif->tx_shmem_handle);
-
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-		BUG();
-
-	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
-			    GNTMAP_host_map, netif->rx_shmem_handle);
-
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-		BUG();
-}
-
-int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
-	      unsigned long rx_ring_ref, unsigned int evtchn)
-{
-	int err = -ENOMEM;
-	struct xen_netif_tx_sring *txs;
-	struct xen_netif_rx_sring *rxs;
-
-	/* Already connected through? */
-	if (netif->irq)
-		return 0;
-
-	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
-	if (netif->tx_comms_area == NULL)
-		return -ENOMEM;
-	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
-	if (netif->rx_comms_area == NULL)
-		goto err_rx;
-
-	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
-	if (err)
-		goto err_map;
-
-	err = bind_interdomain_evtchn_to_irqhandler(
-		netif->domid, evtchn, netif_be_int, 0,
-		netif->dev->name, netif);
-	if (err < 0)
-		goto err_hypervisor;
-	netif->irq = err;
-	disable_irq(netif->irq);
-
-	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
-	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
-
-	rxs = (struct xen_netif_rx_sring *)
-		((char *)netif->rx_comms_area->addr);
-	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
-
-	netif->rx_req_cons_peek = 0;
-
-	netif_get(netif);
-
-	rtnl_lock();
-	netback_carrier_on(netif);
-	if (netif_running(netif->dev))
-		__netif_up(netif);
-	rtnl_unlock();
-
-	return 0;
-err_hypervisor:
-	unmap_frontend_pages(netif);
-err_map:
-	free_vm_area(netif->rx_comms_area);
-err_rx:
-	free_vm_area(netif->tx_comms_area);
-	return err;
-}
-
-void netif_disconnect(struct xen_netif *netif)
-{
-	if (netback_carrier_ok(netif)) {
-		rtnl_lock();
-		netback_carrier_off(netif);
-		netif_carrier_off(netif->dev); /* discard queued packets */
-		if (netif_running(netif->dev))
-			__netif_down(netif);
-		rtnl_unlock();
-		netif_put(netif);
-	}
-
-	atomic_dec(&netif->refcnt);
-	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
-
-	del_timer_sync(&netif->credit_timeout);
-
-	if (netif->irq)
-		unbind_from_irqhandler(netif->irq, netif);
-
-	unregister_netdev(netif->dev);
-
-	if (netif->tx.sring) {
-		unmap_frontend_pages(netif);
-		free_vm_area(netif->tx_comms_area);
-		free_vm_area(netif->rx_comms_area);
-	}
-
-	free_netdev(netif->dev);
-}
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
deleted file mode 100644
index b290525..0000000
--- a/drivers/xen/netback/netback.c
+++ /dev/null
@@ -1,1934 +0,0 @@
-/*
- * Back-end of the driver for virtual network devices. This portion of the
- * driver exports a 'unified' network-device interface that can be accessed
- * by any operating system that implements a compatible front end. A
- * reference front-end implementation can be found in:
- *  drivers/net/xen-netfront.c
- *
- * Copyright (c) 2002-2005, K A Fraser
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "common.h"
-
-#include <linux/kthread.h>
-#include <linux/if_vlan.h>
-#include <linux/udp.h>
-
-#include <net/tcp.h>
-
-#include <xen/balloon.h>
-#include <xen/events.h>
-#include <xen/interface/memory.h>
-
-#include <asm/xen/hypercall.h>
-#include <asm/xen/page.h>
-
-/*define NETBE_DEBUG_INTERRUPT*/
-
-struct xen_netbk *xen_netbk;
-int xen_netbk_group_nr;
-
-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
-static void make_tx_response(struct xen_netif *netif,
-			     struct xen_netif_tx_request *txp,
-			     s8       st);
-static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
-					     u16      id,
-					     s8       st,
-					     u16      offset,
-					     u16      size,
-					     u16      flags);
-
-static void net_tx_action(unsigned long data);
-
-static void net_rx_action(unsigned long data);
-
-static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
-				       unsigned int idx)
-{
-	return page_to_pfn(netbk->mmap_pages[idx]);
-}
-
-static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
-					 unsigned int idx)
-{
-	return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
-}
-
-/* extra field used in struct page */
-static inline void netif_set_page_ext(struct page *pg,
-				      unsigned int group, unsigned int idx)
-{
-	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
-
-	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
-	pg->mapping = ext.mapping;
-}
-
-static int netif_get_page_ext(struct page *pg,
-			      unsigned int *_group, unsigned int *_idx)
-{
-	union page_ext ext = { .mapping = pg->mapping };
-	struct xen_netbk *netbk;
-	unsigned int group, idx;
-
-	if (!PageForeign(pg))
-		return 0;
-
-	group = ext.e.group - 1;
-
-	if (group < 0 || group >= xen_netbk_group_nr)
-		return 0;
-
-	netbk = &xen_netbk[group];
-
-	if (netbk->mmap_pages == NULL)
-		return 0;
-
-	idx = ext.e.idx;
-
-	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
-		return 0;
-
-	if (netbk->mmap_pages[idx] != pg)
-		return 0;
-
-	*_group = group;
-	*_idx = idx;
-
-	return 1;
-}
-
-/*
- * This is the amount of packet we copy rather than map, so that the
- * guest can't fiddle with the contents of the headers while we do
- * packet processing on them (netfilter, routing, etc).
- */
-#define PKT_PROT_LEN    (ETH_HLEN + \
-			 VLAN_HLEN + \
-			 sizeof(struct iphdr) + MAX_IPOPTLEN + \
-			 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
-
-static inline pending_ring_idx_t pending_index(unsigned i)
-{
-	return i & (MAX_PENDING_REQS-1);
-}
-
-static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
-{
-	return MAX_PENDING_REQS -
-		netbk->pending_prod + netbk->pending_cons;
-}
-
-/* Setting this allows the safe use of this driver without netloop. */
-static int MODPARM_copy_skb = 1;
-module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
-MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
-
-int netbk_copy_skb_mode;
-
-static int MODPARM_netback_kthread;
-module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
-MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
-
-/*
- * Netback bottom half handler.
- * dir indicates the data direction.
- * rx: 1, tx: 0.
- */
-static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
-{
-	if (MODPARM_netback_kthread)
-		wake_up(&netbk->kthread.netbk_action_wq);
-	else if (dir)
-		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
-	else
-		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
-}
-
-static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
-{
-	smp_mb();
-	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
-	    !list_empty(&netbk->net_schedule_list))
-		xen_netbk_bh_handler(netbk, 0);
-}
-
-static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
-{
-	struct skb_shared_info *ninfo;
-	struct sk_buff *nskb;
-	unsigned long offset;
-	int ret;
-	int len;
-	int headlen;
-
-	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
-
-	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
-	if (unlikely(!nskb))
-		goto err;
-
-	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
-	headlen = skb_end_pointer(nskb) - nskb->data;
-	if (headlen > skb_headlen(skb))
-		headlen = skb_headlen(skb);
-	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
-	BUG_ON(ret);
-
-	ninfo = skb_shinfo(nskb);
-	ninfo->gso_size = skb_shinfo(skb)->gso_size;
-	ninfo->gso_type = skb_shinfo(skb)->gso_type;
-
-	offset = headlen;
-	len = skb->len - headlen;
-
-	nskb->len = skb->len;
-	nskb->data_len = len;
-	nskb->truesize += len;
-
-	while (len) {
-		struct page *page;
-		int copy;
-		int zero;
-
-		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
-			dump_stack();
-			goto err_free;
-		}
-
-		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
-		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
-
-		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
-		if (unlikely(!page))
-			goto err_free;
-
-		ret = skb_copy_bits(skb, offset, page_address(page), copy);
-		BUG_ON(ret);
-
-		ninfo->frags[ninfo->nr_frags].page = page;
-		ninfo->frags[ninfo->nr_frags].page_offset = 0;
-		ninfo->frags[ninfo->nr_frags].size = copy;
-		ninfo->nr_frags++;
-
-		offset += copy;
-		len -= copy;
-	}
-
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
-	offset = 0;
-#else
-	offset = nskb->data - skb->data;
-#endif
-
-	nskb->transport_header = skb->transport_header + offset;
-	nskb->network_header = skb->network_header + offset;
-	nskb->mac_header = skb->mac_header + offset;
-
-	return nskb;
-
- err_free:
-	kfree_skb(nskb);
- err:
-	return NULL;
-}
-
-static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
-{
-	if (netif->can_sg || netif->gso || netif->gso_prefix)
-		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
-	return 1; /* all in one */
-}
-
-static inline int netbk_queue_full(struct xen_netif *netif)
-{
-	RING_IDX peek   = netif->rx_req_cons_peek;
-	RING_IDX needed = netbk_max_required_rx_slots(netif);
-
-	return ((netif->rx.sring->req_prod - peek) < needed) ||
-	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
-}
-
-/*
- * Returns true if we should start a new receive buffer instead of
- * adding 'size' bytes to a buffer which currently contains 'offset'
- * bytes.
- */
-static bool start_new_rx_buffer(int offset, unsigned long size, int head)
-{
-	/* simple case: we have completely filled the current buffer. */
-	if (offset == MAX_BUFFER_OFFSET)
-		return true;
-
-	/*
-	 * complex case: start a fresh buffer if the current frag
-	 * would overflow the current buffer but only if:
-	 *     (i)   this frag would fit completely in the next buffer
-	 * and (ii)  there is already some data in the current buffer
-	 * and (iii) this is not the head buffer.
-	 *
-	 * Where:
-	 * - (i) stops us splitting a frag into two copies
-	 *   unless the frag is too large for a single buffer.
-	 * - (ii) stops us from leaving a buffer pointlessly empty.
-	 * - (iii) stops us leaving the first buffer
-	 *   empty. Strictly speaking this is already covered
-	 *   by (ii) but is explicitly checked because
-	 *   netfront relies on the first buffer being
-	 *   non-empty and can crash otherwise.
-	 *
-	 * This means we will effectively linearise small
-	 * frags but do not needlessly split large buffers
-	 * into multiple copies tend to give large frags their
-	 * own buffers as before.
-	 */
-	if ((offset + size > MAX_BUFFER_OFFSET) &&
-	    (size <= MAX_BUFFER_OFFSET) && offset && !head)
-		return true;
-
-	return false;
-}
-
-/*
- * Figure out how many ring slots we're going to need to send @skb to
- * the guest. This function is essentially a dry run of
- * netbk_gop_frag_copy.
- */
-static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
-{
-	unsigned int count = 1;
-	int i, copy_off = 0;
-
-	BUG_ON(offset_in_page(skb->data)+skb_headlen(skb) > MAX_BUFFER_OFFSET);
-
-	copy_off = skb_headlen(skb);
-
-	if (skb_shinfo(skb)->gso_size)
-		count++;
-
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		unsigned long size = skb_shinfo(skb)->frags[i].size;
-		unsigned long bytes;
-		while (size > 0) {
-			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
-
-			if (start_new_rx_buffer(copy_off, size, 0)) {
-				count++;
-				copy_off = 0;
-			}
-
-			bytes = size;
-			if (copy_off + bytes > MAX_BUFFER_OFFSET)
-				bytes = MAX_BUFFER_OFFSET - copy_off;
-
-			copy_off += bytes;
-			size -= bytes;
-		}
-	}
-	return count;
-}
-
-int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct xen_netif *netif = netdev_priv(dev);
-	struct xen_netbk *netbk;
-
-	BUG_ON(skb->dev != dev);
-
-	if (netif->group == -1)
-		goto drop;
-
-	netbk = &xen_netbk[netif->group];
-
-	/* Drop the packet if the target domain has no receive buffers. */
-	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
-		goto drop;
-
-	/*
-	 * XXX For now we also copy skbuffs whose head crosses a page
-	 * boundary, because netbk_gop_skb can't handle them.
-	 */
-	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
-		struct sk_buff *nskb = netbk_copy_skb(skb);
-		if (unlikely(nskb == NULL))
-			goto drop;
-		/* Copy only the header fields we use in this driver. */
-		nskb->dev = skb->dev;
-		nskb->ip_summed = skb->ip_summed;
-		dev_kfree_skb(skb);
-		skb = nskb;
-	}
-
-	/* Reserve ring slots for the worst-case number of fragments. */
-	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
-	netif_get(netif);
-
-	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
-		netif->rx.sring->req_event = netif->rx_req_cons_peek +
-			netbk_max_required_rx_slots(netif);
-		mb(); /* request notification /then/ check & stop the queue */
-		if (netbk_queue_full(netif))
-			netif_stop_queue(dev);
-	}
-	skb_queue_tail(&netbk->rx_queue, skb);
-
-	xen_netbk_bh_handler(netbk, 1);
-
-	return 0;
-
- drop:
-	netif->stats.tx_dropped++;
-	dev_kfree_skb(skb);
-	return 0;
-}
-
-struct netrx_pending_operations {
-	unsigned copy_prod, copy_cons;
-	unsigned meta_prod, meta_cons;
-	struct gnttab_copy *copy;
-	struct netbk_rx_meta *meta;
-	int copy_off;
-	grant_ref_t copy_gref;
-};
-
-static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
-						struct netrx_pending_operations *npo)
-{
-	struct netbk_rx_meta *meta;
-	struct xen_netif_rx_request *req;
-
-	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-
-	meta = npo->meta + npo->meta_prod++;
-	meta->gso_size = 0;
-	meta->size = 0;
-	meta->id = req->id;
-
-	npo->copy_off = 0;
-	npo->copy_gref = req->gref;
-
-	return meta;
-}
-
-/*
- * Set up the grant operations for this fragment. If it's a flipping
- * interface, we also set up the unmap request from here.
- */
-static void netbk_gop_frag_copy(struct xen_netif *netif,
-				struct netrx_pending_operations *npo,
-				struct page *page, unsigned long size,
-				unsigned long offset, int head)
-{
-	struct gnttab_copy *copy_gop;
-	struct netbk_rx_meta *meta;
-	/*
-	 * These variables a used iff netif_get_page_ext returns true,
-	 * in which case they are guaranteed to be initialized.
-	 */
-	unsigned int uninitialized_var(group), uninitialized_var(idx);
-	int foreign = netif_get_page_ext(page, &group, &idx);
-	unsigned long bytes;
-
-	/* Data must not cross a page boundary. */
-	BUG_ON(size + offset > PAGE_SIZE);
-
-	meta = npo->meta + npo->meta_prod - 1;
-
-	while (size > 0) {
-		BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
-
-		if (start_new_rx_buffer(npo->copy_off, size, head)) {
-			/*
-			 * Netfront requires there to be some data in the head
-			 * buffer.
-			 */
-			BUG_ON(head);
-
-			meta = get_next_rx_buffer(netif, npo);
-		}
-
-		bytes = size;
-		if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
-			bytes = MAX_BUFFER_OFFSET - npo->copy_off;
-
-		copy_gop = npo->copy + npo->copy_prod++;
-		copy_gop->flags = GNTCOPY_dest_gref;
-		if (foreign) {
-			struct xen_netbk *netbk = &xen_netbk[group];
-			struct pending_tx_info *src_pend;
-
-			src_pend = &netbk->pending_tx_info[idx];
-
-			copy_gop->source.domid = src_pend->netif->domid;
-			copy_gop->source.u.ref = src_pend->req.gref;
-			copy_gop->flags |= GNTCOPY_source_gref;
-		} else {
-			void *vaddr = page_address(page);
-			copy_gop->source.domid = DOMID_SELF;
-			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
-		}
-		copy_gop->source.offset = offset;
-		copy_gop->dest.domid = netif->domid;
-
-		copy_gop->dest.offset = npo->copy_off;
-		copy_gop->dest.u.ref = npo->copy_gref;
-		copy_gop->len = bytes;
-
-		npo->copy_off += bytes;
-		meta->size += bytes;
-
-		offset += bytes;
-		size -= bytes;
-		head = 0; /* There must be something in this buffer now. */
-	}
-}
-
-/*
- * Prepare an SKB to be transmitted to the frontend.
- *
- * This function is responsible for allocating grant operations, meta
- * structures, etc.
- *
- * It returns the number of meta structures consumed. The number of
- * ring slots used is always equal to the number of meta slots used
- * plus the number of GSO descriptors used. Currently, we use either
- * zero GSO descriptors (for non-GSO packets) or one descriptor (for
- * frontend-side LRO).
- */
-static int netbk_gop_skb(struct sk_buff *skb,
-			 struct netrx_pending_operations *npo)
-{
-	struct xen_netif *netif = netdev_priv(skb->dev);
-	int nr_frags = skb_shinfo(skb)->nr_frags;
-	int i;
-	struct xen_netif_rx_request *req;
-	struct netbk_rx_meta *meta;
-	int old_meta_prod;
-
-	old_meta_prod = npo->meta_prod;
-
-	/* Set up a GSO prefix descriptor, if necessary */
-	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
-		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-		meta = npo->meta + npo->meta_prod++;
-		meta->gso_size = skb_shinfo(skb)->gso_size;
-		meta->size = 0;
-		meta->id = req->id;
-	}
-
-	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
-	meta = npo->meta + npo->meta_prod++;
-
-	if (!netif->gso_prefix)
-		meta->gso_size = skb_shinfo(skb)->gso_size;
-	else
-		meta->gso_size = 0;
-
-	meta->size = 0;
-	meta->id = req->id;
-	npo->copy_off = 0;
-	npo->copy_gref = req->gref;
-
-	netbk_gop_frag_copy(netif,
-			    npo, virt_to_page(skb->data),
-			    skb_headlen(skb),
-			    offset_in_page(skb->data), 1);
-
-	/* Leave a gap for the GSO descriptor. */
-	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
-		netif->rx.req_cons++;
-
-	for (i = 0; i < nr_frags; i++) {
-		netbk_gop_frag_copy(netif, npo,
-				    skb_shinfo(skb)->frags[i].page,
-				    skb_shinfo(skb)->frags[i].size,
-				    skb_shinfo(skb)->frags[i].page_offset,
-				    0);
-	}
-
-	return npo->meta_prod - old_meta_prod;
-}
-
-/*
- * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
- * used to set up the operations on the top of
- * netrx_pending_operations, which have since been done.  Check that
- * they didn't give any errors and advance over them.
- */
-static int netbk_check_gop(int nr_meta_slots, domid_t domid,
-			   struct netrx_pending_operations *npo)
-{
-	struct gnttab_copy     *copy_op;
-	int status = NETIF_RSP_OKAY;
-	int i;
-
-	for (i = 0; i < nr_meta_slots; i++) {
-		copy_op = npo->copy + npo->copy_cons++;
-		if (copy_op->status != GNTST_okay) {
-				pr_debug("Bad status %d from copy to DOM%d.\n",
-					 copy_op->status, domid);
-				status = NETIF_RSP_ERROR;
-			}
-	}
-
-	return status;
-}
-
-static void netbk_add_frag_responses(struct xen_netif *netif, int status,
-				     struct netbk_rx_meta *meta,
-				     int nr_meta_slots)
-{
-	int i;
-	unsigned long offset;
-
-	/* No fragments used */
-	if (nr_meta_slots <= 1)
-		return;
-
-	nr_meta_slots--;
-
-	for (i = 0; i < nr_meta_slots; i++) {
-		int flags;
-		if (i == nr_meta_slots - 1)
-			flags = 0;
-		else
-			flags = NETRXF_more_data;
-
-		offset = 0;
-		make_rx_response(netif, meta[i].id, status, offset,
-				 meta[i].size, flags);
-	}
-}
-
-struct skb_cb_overlay {
-	int meta_slots_used;
-};
-
-static void net_rx_action(unsigned long data)
-{
-	struct xen_netif *netif = NULL;
-	struct xen_netbk *netbk = (struct xen_netbk *)data;
-	s8 status;
-	u16 irq, flags;
-	struct xen_netif_rx_response *resp;
-	struct sk_buff_head rxq;
-	struct sk_buff *skb;
-	int notify_nr = 0;
-	int ret;
-	int nr_frags;
-	int count;
-	unsigned long offset;
-	struct skb_cb_overlay *sco;
-
-	struct netrx_pending_operations npo = {
-		.copy  = netbk->grant_copy_op,
-		.meta  = netbk->meta,
-	};
-
-	skb_queue_head_init(&rxq);
-
-	count = 0;
-
-	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
-		netif = netdev_priv(skb->dev);
-		nr_frags = skb_shinfo(skb)->nr_frags;
-
-		sco = (struct skb_cb_overlay *)skb->cb;
-		sco->meta_slots_used = netbk_gop_skb(skb, &npo);
-
-		count += nr_frags + 1;
-
-		__skb_queue_tail(&rxq, skb);
-
-		/* Filled the batch queue? */
-		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
-			break;
-	}
-
-	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
-
-	if (!npo.copy_prod)
-		return;
-
-	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
-	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
-					npo.copy_prod);
-	BUG_ON(ret != 0);
-
-	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-		sco = (struct skb_cb_overlay *)skb->cb;
-
-		netif = netdev_priv(skb->dev);
-
-		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
-			resp = RING_GET_RESPONSE(&netif->rx,
-						netif->rx.rsp_prod_pvt++);
-
-			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
-
-			resp->offset = netbk->meta[npo.meta_cons].gso_size;
-			resp->id = netbk->meta[npo.meta_cons].id;
-			resp->status = sco->meta_slots_used;
-
-			npo.meta_cons++;
-			sco->meta_slots_used--;
-		}
-
-
-		netif->stats.tx_bytes += skb->len;
-		netif->stats.tx_packets++;
-
-		status = netbk_check_gop(sco->meta_slots_used,
-					 netif->domid, &npo);
-
-		if (sco->meta_slots_used == 1)
-			flags = 0;
-		else
-			flags = NETRXF_more_data;
-
-		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
-			flags |= NETRXF_csum_blank | NETRXF_data_validated;
-		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-			/* remote but checksummed. */
-			flags |= NETRXF_data_validated;
-
-		offset = 0;
-		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
-					status, offset,
-					netbk->meta[npo.meta_cons].size,
-					flags);
-
-		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
-			struct xen_netif_extra_info *gso =
-				(struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&netif->rx,
-						  netif->rx.rsp_prod_pvt++);
-
-			resp->flags |= NETRXF_extra_info;
-
-			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
-			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
-			gso->u.gso.pad = 0;
-			gso->u.gso.features = 0;
-
-			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
-			gso->flags = 0;
-		}
-
-		netbk_add_frag_responses(netif, status,
-					 netbk->meta + npo.meta_cons + 1,
-					 sco->meta_slots_used);
-
-		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
-		irq = netif->irq;
-		if (ret && !netbk->rx_notify[irq]) {
-			netbk->rx_notify[irq] = 1;
-			netbk->notify_list[notify_nr++] = irq;
-		}
-
-		if (netif_queue_stopped(netif->dev) &&
-		    netif_schedulable(netif) &&
-		    !netbk_queue_full(netif))
-			netif_wake_queue(netif->dev);
-
-		netif_put(netif);
-		npo.meta_cons += sco->meta_slots_used;
-		dev_kfree_skb(skb);
-	}
-
-	while (notify_nr != 0) {
-		irq = netbk->notify_list[--notify_nr];
-		netbk->rx_notify[irq] = 0;
-		notify_remote_via_irq(irq);
-	}
-
-	/* More work to do? */
-	if (!skb_queue_empty(&netbk->rx_queue) &&
-			!timer_pending(&netbk->net_timer))
-		xen_netbk_bh_handler(netbk, 1);
-}
-
-static void net_alarm(unsigned long data)
-{
-	struct xen_netbk *netbk = (struct xen_netbk *)data;
-	xen_netbk_bh_handler(netbk, 1);
-}
-
-static void netbk_tx_pending_timeout(unsigned long data)
-{
-	struct xen_netbk *netbk = (struct xen_netbk *)data;
-	xen_netbk_bh_handler(netbk, 0);
-}
-
-struct net_device_stats *netif_be_get_stats(struct net_device *dev)
-{
-	struct xen_netif *netif = netdev_priv(dev);
-	return &netif->stats;
-}
-
-static int __on_net_schedule_list(struct xen_netif *netif)
-{
-	return !list_empty(&netif->list);
-}
-
-/* Must be called with net_schedule_list_lock held */
-static void remove_from_net_schedule_list(struct xen_netif *netif)
-{
-	if (likely(__on_net_schedule_list(netif))) {
-		list_del_init(&netif->list);
-		netif_put(netif);
-	}
-}
-
-static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
-{
-	struct xen_netif *netif = NULL;
-
-	spin_lock_irq(&netbk->net_schedule_list_lock);
-	if (list_empty(&netbk->net_schedule_list))
-		goto out;
-
-	netif = list_first_entry(&netbk->net_schedule_list,
-				 struct xen_netif, list);
-	if (!netif)
-		goto out;
-
-	netif_get(netif);
-
-	remove_from_net_schedule_list(netif);
-out:
-	spin_unlock_irq(&netbk->net_schedule_list_lock);
-	return netif;
-}
-
-static void add_to_net_schedule_list_tail(struct xen_netif *netif)
-{
-	unsigned long flags;
-
-	struct xen_netbk *netbk = &xen_netbk[netif->group];
-	if (__on_net_schedule_list(netif))
-		return;
-
-	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
-	if (!__on_net_schedule_list(netif) &&
-	    likely(netif_schedulable(netif))) {
-		list_add_tail(&netif->list, &netbk->net_schedule_list);
-		netif_get(netif);
-	}
-	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
-}
-
-void netif_schedule_work(struct xen_netif *netif)
-{
-	struct xen_netbk *netbk = &xen_netbk[netif->group];
-	int more_to_do;
-
-	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
-
-	if (more_to_do) {
-		add_to_net_schedule_list_tail(netif);
-		maybe_schedule_tx_action(netbk);
-	}
-}
-
-void netif_deschedule_work(struct xen_netif *netif)
-{
-	struct xen_netbk *netbk = &xen_netbk[netif->group];
-	spin_lock_irq(&netbk->net_schedule_list_lock);
-	remove_from_net_schedule_list(netif);
-	spin_unlock_irq(&netbk->net_schedule_list_lock);
-}
-
-
-static void tx_add_credit(struct xen_netif *netif)
-{
-	unsigned long max_burst, max_credit;
-
-	/*
-	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
-	 * Otherwise the interface can seize up due to insufficient credit.
-	 */
-	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
-	max_burst = min(max_burst, 131072UL);
-	max_burst = max(max_burst, netif->credit_bytes);
-
-	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
-	max_credit = netif->remaining_credit + netif->credit_bytes;
-	if (max_credit < netif->remaining_credit)
-		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
-
-	netif->remaining_credit = min(max_credit, max_burst);
-}
-
-static void tx_credit_callback(unsigned long data)
-{
-	struct xen_netif *netif = (struct xen_netif *)data;
-	tx_add_credit(netif);
-	netif_schedule_work(netif);
-}
-
-static inline int copy_pending_req(struct xen_netbk *netbk,
-				   pending_ring_idx_t pending_idx)
-{
-	return gnttab_copy_grant_page(
-			netbk->grant_tx_handle[pending_idx],
-			&netbk->mmap_pages[pending_idx]);
-}
-
-static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
-{
-	struct netbk_tx_pending_inuse *inuse, *n;
-	struct gnttab_unmap_grant_ref *gop;
-	u16 pending_idx;
-	pending_ring_idx_t dc, dp;
-	struct xen_netif *netif;
-	int ret;
-	LIST_HEAD(list);
-
-	dc = netbk->dealloc_cons;
-	gop = netbk->tx_unmap_ops;
-
-	/* Free up any grants we have finished using. */
-	do {
-		dp = netbk->dealloc_prod;
-
-		/* Ensure we see all indices enqueued by netif_idx_release(). */
-		smp_rmb();
-
-		while (dc != dp) {
-			unsigned long pfn;
-			struct netbk_tx_pending_inuse *pending_inuse =
-					netbk->pending_inuse;
-
-			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
-			list_move_tail(&pending_inuse[pending_idx].list, &list);
-
-			pfn = idx_to_pfn(netbk, pending_idx);
-			/* Already unmapped? */
-			if (!phys_to_machine_mapping_valid(pfn))
-				continue;
-
-			gnttab_set_unmap_op(gop,
-					idx_to_kaddr(netbk, pending_idx),
-					GNTMAP_host_map,
-					netbk->grant_tx_handle[pending_idx]);
-			gop++;
-		}
-
-	} while (dp != netbk->dealloc_prod);
-
-	netbk->dealloc_cons = dc;
-
-	ret = HYPERVISOR_grant_table_op(
-		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
-		gop - netbk->tx_unmap_ops);
-	BUG_ON(ret);
-
-	/*
-	 * Copy any entries that have been pending for too long
-	 */
-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-	    !list_empty(&netbk->pending_inuse_head)) {
-		list_for_each_entry_safe(inuse, n,
-				&netbk->pending_inuse_head, list) {
-			struct pending_tx_info *pending_tx_info;
-			pending_tx_info = netbk->pending_tx_info;
-
-			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
-				break;
-
-			pending_idx = inuse - netbk->pending_inuse;
-
-			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
-
-			switch (copy_pending_req(netbk, pending_idx)) {
-			case 0:
-				list_move_tail(&inuse->list, &list);
-				continue;
-			case -EBUSY:
-				list_del_init(&inuse->list);
-				continue;
-			case -ENOENT:
-				continue;
-			}
-
-			break;
-		}
-	}
-
-	list_for_each_entry_safe(inuse, n, &list, list) {
-		struct pending_tx_info *pending_tx_info;
-		pending_ring_idx_t index;
-
-		pending_tx_info = netbk->pending_tx_info;
-		pending_idx = inuse - netbk->pending_inuse;
-
-		netif = pending_tx_info[pending_idx].netif;
-
-		make_tx_response(netif, &pending_tx_info[pending_idx].req,
-				 NETIF_RSP_OKAY);
-
-		/* Ready for next use. */
-		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
-
-		index = pending_index(netbk->pending_prod++);
-		netbk->pending_ring[index] = pending_idx;
-
-		netif_put(netif);
-
-		list_del_init(&inuse->list);
-	}
-}
-
-static void netbk_tx_err(struct xen_netif *netif,
-		struct xen_netif_tx_request *txp, RING_IDX end)
-{
-	RING_IDX cons = netif->tx.req_cons;
-
-	do {
-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-		if (cons >= end)
-			break;
-		txp = RING_GET_REQUEST(&netif->tx, cons++);
-	} while (1);
-	netif->tx.req_cons = cons;
-	netif_schedule_work(netif);
-	netif_put(netif);
-}
-
-static int netbk_count_requests(struct xen_netif *netif,
-				struct xen_netif_tx_request *first,
-				struct xen_netif_tx_request *txp,
-				int work_to_do)
-{
-	RING_IDX cons = netif->tx.req_cons;
-	int frags = 0;
-
-	if (!(first->flags & NETTXF_more_data))
-		return 0;
-
-	do {
-		if (frags >= work_to_do) {
-			pr_debug("Need more frags\n");
-			return -frags;
-		}
-
-		if (unlikely(frags >= MAX_SKB_FRAGS)) {
-			pr_debug("Too many frags\n");
-			return -frags;
-		}
-
-		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
-		       sizeof(*txp));
-		if (txp->size > first->size) {
-			pr_debug("Frags galore\n");
-			return -frags;
-		}
-
-		first->size -= txp->size;
-		frags++;
-
-		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
-			pr_debug("txp->offset: %x, size: %u\n",
-				 txp->offset, txp->size);
-			return -frags;
-		}
-	} while ((txp++)->flags & NETTXF_more_data);
-
-	return frags;
-}
-
-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
-						       struct xen_netif *netif,
-						       struct sk_buff *skb,
-						       struct xen_netif_tx_request *txp,
-						       struct gnttab_map_grant_ref *mop)
-{
-	struct skb_shared_info *shinfo = skb_shinfo(skb);
-	skb_frag_t *frags = shinfo->frags;
-	unsigned long pending_idx = *((u16 *)skb->data);
-	int i, start;
-
-	/* Skip first skb fragment if it is on same page as header fragment. */
-	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
-
-	for (i = start; i < shinfo->nr_frags; i++, txp++) {
-		pending_ring_idx_t index;
-		struct pending_tx_info *pending_tx_info =
-			netbk->pending_tx_info;
-
-		index = pending_index(netbk->pending_cons++);
-		pending_idx = netbk->pending_ring[index];
-
-		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
-				  GNTMAP_host_map | GNTMAP_readonly,
-				  txp->gref, netif->domid);
-
-		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
-		netif_get(netif);
-		pending_tx_info[pending_idx].netif = netif;
-		frags[i].page = (void *)pending_idx;
-	}
-
-	return mop;
-}
-
-static int netbk_tx_check_mop(struct xen_netbk *netbk,
-			      struct sk_buff *skb,
-			      struct gnttab_map_grant_ref **mopp)
-{
-	struct gnttab_map_grant_ref *mop = *mopp;
-	int pending_idx = *((u16 *)skb->data);
-	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
-	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
-	struct xen_netif_tx_request *txp;
-	struct skb_shared_info *shinfo = skb_shinfo(skb);
-	int nr_frags = shinfo->nr_frags;
-	int i, err, start;
-
-	/* Check status of header. */
-	err = mop->status;
-	if (unlikely(err)) {
-		pending_ring_idx_t index;
-		index = pending_index(netbk->pending_prod++);
-		txp = &pending_tx_info[pending_idx].req;
-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-		netbk->pending_ring[index] = pending_idx;
-		netif_put(netif);
-	} else {
-		set_phys_to_machine(
-			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
-			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-		netbk->grant_tx_handle[pending_idx] = mop->handle;
-	}
-
-	/* Skip first skb fragment if it is on same page as header fragment. */
-	start = ((unsigned long)shinfo->frags[0].page == pending_idx);
-
-	for (i = start; i < nr_frags; i++) {
-		int j, newerr;
-		pending_ring_idx_t index;
-
-		pending_idx = (unsigned long)shinfo->frags[i].page;
-
-		/* Check error status: if okay then remember grant handle. */
-		newerr = (++mop)->status;
-		if (likely(!newerr)) {
-			unsigned long addr;
-			addr = idx_to_kaddr(netbk, pending_idx);
-			set_phys_to_machine(
-				__pa(addr)>>PAGE_SHIFT,
-				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
-			netbk->grant_tx_handle[pending_idx] = mop->handle;
-			/* Had a previous error? Invalidate this fragment. */
-			if (unlikely(err))
-				netif_idx_release(netbk, pending_idx);
-			continue;
-		}
-
-		/* Error on this fragment: respond to client with an error. */
-		txp = &netbk->pending_tx_info[pending_idx].req;
-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
-		index = pending_index(netbk->pending_prod++);
-		netbk->pending_ring[index] = pending_idx;
-		netif_put(netif);
-
-		/* Not the first error? Preceding frags already invalidated. */
-		if (err)
-			continue;
-
-		/* First error: invalidate header and preceding fragments. */
-		pending_idx = *((u16 *)skb->data);
-		netif_idx_release(netbk, pending_idx);
-		for (j = start; j < i; j++) {
-			pending_idx = (unsigned long)shinfo->frags[i].page;
-			netif_idx_release(netbk, pending_idx);
-		}
-
-		/* Remember the error: invalidate all subsequent fragments. */
-		err = newerr;
-	}
-
-	*mopp = mop + 1;
-	return err;
-}
-
-static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
-{
-	struct skb_shared_info *shinfo = skb_shinfo(skb);
-	int nr_frags = shinfo->nr_frags;
-	int i;
-
-	for (i = 0; i < nr_frags; i++) {
-		skb_frag_t *frag = shinfo->frags + i;
-		struct xen_netif_tx_request *txp;
-		unsigned long pending_idx;
-
-		pending_idx = (unsigned long)frag->page;
-
-		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
-		list_add_tail(&netbk->pending_inuse[pending_idx].list,
-			      &netbk->pending_inuse_head);
-
-		txp = &netbk->pending_tx_info[pending_idx].req;
-		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
-		frag->size = txp->size;
-		frag->page_offset = txp->offset;
-
-		skb->len += txp->size;
-		skb->data_len += txp->size;
-		skb->truesize += txp->size;
-	}
-}
-
-int netbk_get_extras(struct xen_netif *netif,
-		     struct xen_netif_extra_info *extras,
-		     int work_to_do)
-{
-	struct xen_netif_extra_info extra;
-	RING_IDX cons = netif->tx.req_cons;
-
-	do {
-		if (unlikely(work_to_do-- <= 0)) {
-			pr_debug("Missing extra info\n");
-			return -EBADR;
-		}
-
-		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
-		       sizeof(extra));
-		if (unlikely(!extra.type ||
-			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
-			netif->tx.req_cons = ++cons;
-			pr_debug("Invalid extra type: %d\n", extra.type);
-			return -EINVAL;
-		}
-
-		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
-		netif->tx.req_cons = ++cons;
-	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
-
-	return work_to_do;
-}
-
-static int netbk_set_skb_gso(struct sk_buff *skb,
-			     struct xen_netif_extra_info *gso)
-{
-	if (!gso->u.gso.size) {
-		pr_debug("GSO size must not be zero.\n");
-		return -EINVAL;
-	}
-
-	/* Currently only TCPv4 S.O. is supported. */
-	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
-		pr_debug("Bad GSO type %d.\n", gso->u.gso.type);
-		return -EINVAL;
-	}
-
-	skb_shinfo(skb)->gso_size = gso->u.gso.size;
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-
-	/* Header must be checked, and gso_segs computed. */
-	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-	skb_shinfo(skb)->gso_segs = 0;
-
-	return 0;
-}
-
-static int checksum_setup(struct xen_netif *netif, struct sk_buff *skb)
-{
-	struct iphdr *iph;
-	unsigned char *th;
-	int err = -EPROTO;
-	int recalculate_partial_csum = 0;
-
-	/*
-	 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
-	 * peers can fail to set NETRXF_csum_blank when sending a GSO
-	 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
-	 * recalculate the partial checksum.
-	 */
-	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
-		netif->rx_gso_checksum_fixup++;
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		recalculate_partial_csum = 1;
-	}
-
-	/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
-	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		return 0;
-
-	if (skb->protocol != htons(ETH_P_IP))
-		goto out;
-
-	iph = (void *)skb->data;
-	th = skb->data + 4 * iph->ihl;
-	if (th >= skb_tail_pointer(skb))
-		goto out;
-
-	skb->csum_start = th - skb->head;
-	switch (iph->protocol) {
-	case IPPROTO_TCP:
-		skb->csum_offset = offsetof(struct tcphdr, check);
-
-		if (recalculate_partial_csum) {
-			struct tcphdr *tcph = (struct tcphdr *)th;
-			tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - iph->ihl*4,
-							 IPPROTO_TCP, 0);
-		}
-		break;
-	case IPPROTO_UDP:
-		skb->csum_offset = offsetof(struct udphdr, check);
-
-		if (recalculate_partial_csum) {
-			struct udphdr *udph = (struct udphdr *)th;
-			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - iph->ihl*4,
-							 IPPROTO_UDP, 0);
-		}
-		break;
-	default:
-		if (net_ratelimit())
-			printk(KERN_ERR "Attempting to checksum a non-"
-			       "TCP/UDP packet, dropping a protocol"
-			       " %d packet", iph->protocol);
-		goto out;
-	}
-
-	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
-		goto out;
-
-	err = 0;
-
-out:
-	return err;
-}
-
-static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
-{
-	unsigned long now = jiffies;
-	unsigned long next_credit =
-		netif->credit_timeout.expires +
-		msecs_to_jiffies(netif->credit_usec / 1000);
-
-	/* Timer could already be pending in rare cases. */
-	if (timer_pending(&netif->credit_timeout))
-		return true;
-
-	/* Passed the point where we can replenish credit? */
-	if (time_after_eq(now, next_credit)) {
-		netif->credit_timeout.expires = now;
-		tx_add_credit(netif);
-	}
-
-	/* Still too big to send right now? Set a callback. */
-	if (size > netif->remaining_credit) {
-		netif->credit_timeout.data     =
-			(unsigned long)netif;
-		netif->credit_timeout.function =
-			tx_credit_callback;
-		mod_timer(&netif->credit_timeout,
-			  next_credit);
-
-		return true;
-	}
-
-	return false;
-}
-
-static unsigned net_tx_build_mops(struct xen_netbk *netbk)
-{
-	struct gnttab_map_grant_ref *mop;
-	struct sk_buff *skb;
-	int ret;
-
-	mop = netbk->tx_map_ops;
-	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-		!list_empty(&netbk->net_schedule_list)) {
-		struct xen_netif *netif;
-		struct xen_netif_tx_request txreq;
-		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
-		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
-		u16 pending_idx;
-		RING_IDX idx;
-		int work_to_do;
-		unsigned int data_len;
-		pending_ring_idx_t index;
-
-		/* Get a netif from the list with work to do. */
-		netif = poll_net_schedule_list(netbk);
-		if (!netif)
-			continue;
-
-		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
-		if (!work_to_do) {
-			netif_put(netif);
-			continue;
-		}
-
-		idx = netif->tx.req_cons;
-		rmb(); /* Ensure that we see the request before we copy it. */
-		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
-
-		/* Credit-based scheduling. */
-		if (txreq.size > netif->remaining_credit &&
-		    tx_credit_exceeded(netif, txreq.size)) {
-			netif_put(netif);
-			continue;
-		}
-
-		netif->remaining_credit -= txreq.size;
-
-		work_to_do--;
-		netif->tx.req_cons = ++idx;
-
-		memset(extras, 0, sizeof(extras));
-		if (txreq.flags & NETTXF_extra_info) {
-			work_to_do = netbk_get_extras(netif, extras,
-						      work_to_do);
-			idx = netif->tx.req_cons;
-			if (unlikely(work_to_do < 0)) {
-				netbk_tx_err(netif, &txreq, idx);
-				continue;
-			}
-		}
-
-		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
-		if (unlikely(ret < 0)) {
-			netbk_tx_err(netif, &txreq, idx - ret);
-			continue;
-		}
-		idx += ret;
-
-		if (unlikely(txreq.size < ETH_HLEN)) {
-			pr_debug("Bad packet size: %d\n", txreq.size);
-			netbk_tx_err(netif, &txreq, idx);
-			continue;
-		}
-
-		/* No crossing a page as the payload mustn't fragment. */
-		if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
-			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
-				 txreq.offset, txreq.size,
-				 (txreq.offset&~PAGE_MASK) + txreq.size);
-			netbk_tx_err(netif, &txreq, idx);
-			continue;
-		}
-
-		index = pending_index(netbk->pending_cons);
-		pending_idx = netbk->pending_ring[index];
-
-		data_len = (txreq.size > PKT_PROT_LEN &&
-			    ret < MAX_SKB_FRAGS) ?
-			PKT_PROT_LEN : txreq.size;
-
-		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
-				GFP_ATOMIC | __GFP_NOWARN);
-		if (unlikely(skb == NULL)) {
-			pr_debug("Can't allocate a skb in start_xmit.\n");
-			netbk_tx_err(netif, &txreq, idx);
-			break;
-		}
-
-		/* Packets passed to netif_rx() must have some headroom. */
-		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-
-		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
-			struct xen_netif_extra_info *gso;
-			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
-
-			if (netbk_set_skb_gso(skb, gso)) {
-				kfree_skb(skb);
-				netbk_tx_err(netif, &txreq, idx);
-				continue;
-			}
-		}
-
-		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
-				  GNTMAP_host_map | GNTMAP_readonly,
-				  txreq.gref, netif->domid);
-		mop++;
-
-		memcpy(&netbk->pending_tx_info[pending_idx].req,
-		       &txreq, sizeof(txreq));
-		netbk->pending_tx_info[pending_idx].netif = netif;
-		*((u16 *)skb->data) = pending_idx;
-
-		__skb_put(skb, data_len);
-
-		skb_shinfo(skb)->nr_frags = ret;
-		if (data_len < txreq.size) {
-			skb_shinfo(skb)->nr_frags++;
-			skb_shinfo(skb)->frags[0].page =
-				(void *)(unsigned long)pending_idx;
-		} else {
-			/* Discriminate from any valid pending_idx value. */
-			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
-		}
-
-		__skb_queue_tail(&netbk->tx_queue, skb);
-
-		netbk->pending_cons++;
-
-		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
-
-		netif->tx.req_cons = idx;
-		netif_schedule_work(netif);
-
-		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
-			break;
-	}
-
-	return mop - netbk->tx_map_ops;
-}
-
-static void net_tx_submit(struct xen_netbk *netbk)
-{
-	struct gnttab_map_grant_ref *mop;
-	struct sk_buff *skb;
-
-	mop = netbk->tx_map_ops;
-	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
-		struct xen_netif_tx_request *txp;
-		struct xen_netif *netif;
-		u16 pending_idx;
-		unsigned data_len;
-
-		pending_idx = *((u16 *)skb->data);
-		netif = netbk->pending_tx_info[pending_idx].netif;
-		txp = &netbk->pending_tx_info[pending_idx].req;
-
-		/* Check the remap error code. */
-		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
-			pr_debug("netback grant failed.\n");
-			skb_shinfo(skb)->nr_frags = 0;
-			kfree_skb(skb);
-			continue;
-		}
-
-		data_len = skb->len;
-		memcpy(skb->data,
-		       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
-		       data_len);
-		if (data_len < txp->size) {
-			/* Append the packet payload as a fragment. */
-			txp->offset += data_len;
-			txp->size -= data_len;
-		} else {
-			/* Schedule a response immediately. */
-			netif_idx_release(netbk, pending_idx);
-		}
-
-		if (txp->flags & NETTXF_csum_blank)
-			skb->ip_summed = CHECKSUM_PARTIAL;
-		else if (txp->flags & NETTXF_data_validated)
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-		netbk_fill_frags(netbk, skb);
-
-		/*
-		 * If the initial fragment was < PKT_PROT_LEN then
-		 * pull through some bytes from the other fragments to
-		 * increase the linear region to PKT_PROT_LEN bytes.
-		 */
-		if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
-			int target = min_t(int, skb->len, PKT_PROT_LEN);
-			__pskb_pull_tail(skb, target - skb_headlen(skb));
-		}
-
-		skb->dev      = netif->dev;
-		skb->protocol = eth_type_trans(skb, skb->dev);
-
-		if (checksum_setup(netif, skb)) {
-			pr_debug("Can't setup checksum in net_tx_action\n");
-			kfree_skb(skb);
-			continue;
-		}
-
-		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
-		    unlikely(skb_linearize(skb))) {
-			pr_debug("Can't linearize skb in net_tx_action.\n");
-			kfree_skb(skb);
-			continue;
-		}
-
-		netif->stats.rx_bytes += skb->len;
-		netif->stats.rx_packets++;
-
-		netif_rx_ni(skb);
-		netif->dev->last_rx = jiffies;
-	}
-}
-
-/* Called after netfront has transmitted */
-static void net_tx_action(unsigned long data)
-{
-	struct xen_netbk *netbk = (struct xen_netbk *)data;
-	unsigned nr_mops;
-	int ret;
-
-	net_tx_action_dealloc(netbk);
-
-	nr_mops = net_tx_build_mops(netbk);
-
-	if (nr_mops == 0)
-		goto out;
-
-	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-					netbk->tx_map_ops, nr_mops);
-	BUG_ON(ret);
-
-	net_tx_submit(netbk);
-out:
-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-	    !list_empty(&netbk->pending_inuse_head)) {
-		struct netbk_tx_pending_inuse *oldest;
-
-		oldest = list_entry(netbk->pending_inuse_head.next,
-				    struct netbk_tx_pending_inuse, list);
-		mod_timer(&netbk->netbk_tx_pending_timer,
-				oldest->alloc_time + HZ);
-	}
-}
-
-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
-{
-	static DEFINE_SPINLOCK(_lock);
-	unsigned long flags;
-	pending_ring_idx_t index;
-
-	spin_lock_irqsave(&_lock, flags);
-	index = pending_index(netbk->dealloc_prod);
-	netbk->dealloc_ring[index] = pending_idx;
-	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
-	smp_wmb();
-	netbk->dealloc_prod++;
-	spin_unlock_irqrestore(&_lock, flags);
-
-	xen_netbk_bh_handler(netbk, 0);
-}
-
-static void netif_page_release(struct page *page, unsigned int order)
-{
-	unsigned int group, idx;
-	int foreign = netif_get_page_ext(page, &group, &idx);
-
-	BUG_ON(!foreign);
-	BUG_ON(order);
-
-	netif_idx_release(&xen_netbk[group], idx);
-}
-
-irqreturn_t netif_be_int(int irq, void *dev_id)
-{
-	struct xen_netif *netif = dev_id;
-	struct xen_netbk *netbk;
-
-	if (netif->group == -1)
-		return IRQ_NONE;
-
-	netbk = &xen_netbk[netif->group];
-
-	add_to_net_schedule_list_tail(netif);
-	maybe_schedule_tx_action(netbk);
-
-	if (netif_schedulable(netif) && !netbk_queue_full(netif))
-		netif_wake_queue(netif->dev);
-
-	return IRQ_HANDLED;
-}
-
-static void make_tx_response(struct xen_netif *netif,
-			     struct xen_netif_tx_request *txp,
-			     s8       st)
-{
-	RING_IDX i = netif->tx.rsp_prod_pvt;
-	struct xen_netif_tx_response *resp;
-	int notify;
-
-	resp = RING_GET_RESPONSE(&netif->tx, i);
-	resp->id     = txp->id;
-	resp->status = st;
-
-	if (txp->flags & NETTXF_extra_info)
-		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
-
-	netif->tx.rsp_prod_pvt = ++i;
-	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
-	if (notify)
-		notify_remote_via_irq(netif->irq);
-}
-
-static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
-					     u16      id,
-					     s8       st,
-					     u16      offset,
-					     u16      size,
-					     u16      flags)
-{
-	RING_IDX i = netif->rx.rsp_prod_pvt;
-	struct xen_netif_rx_response *resp;
-
-	resp = RING_GET_RESPONSE(&netif->rx, i);
-	resp->offset     = offset;
-	resp->flags      = flags;
-	resp->id         = id;
-	resp->status     = (s16)size;
-	if (st < 0)
-		resp->status = (s16)st;
-
-	netif->rx.rsp_prod_pvt = ++i;
-
-	return resp;
-}
-
-#ifdef NETBE_DEBUG_INTERRUPT
-static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
-{
-	struct list_head *ent;
-	struct xen_netif *netif;
-	int i = 0;
-	int group = 0;
-
-	printk(KERN_ALERT "netif_schedule_list:\n");
-
-	for (group = 0; group < xen_netbk_group_nr; group++) {
-		struct xen_netbk *netbk = &xen_netbk[group];
-		spin_lock_irq(&netbk->net_schedule_list_lock);
-		printk(KERN_ALERT "xen_netback group number: %d\n", group);
-		list_for_each(ent, &netbk->net_schedule_list) {
-			netif = list_entry(ent, struct xen_netif, list);
-			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
-				"rx_resp_prod=%08x\n",
-				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
-			printk(KERN_ALERT
-				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
-				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
-			printk(KERN_ALERT
-				"   shared(rx_req_prod=%08x "
-				"rx_resp_prod=%08x\n",
-				netif->rx.sring->req_prod,
-				netif->rx.sring->rsp_prod);
-			printk(KERN_ALERT
-				"   rx_event=%08x, tx_req_prod=%08x\n",
-				netif->rx.sring->rsp_event,
-				netif->tx.sring->req_prod);
-			printk(KERN_ALERT
-				"   tx_resp_prod=%08x, tx_event=%08x)\n",
-				netif->tx.sring->rsp_prod,
-				netif->tx.sring->rsp_event);
-			i++;
-		}
-		spin_unlock_irq(&netbk->net_schedule_list_lock);
-	}
-
-	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
-
-	return IRQ_HANDLED;
-}
-#endif
-
-static inline int rx_work_todo(struct xen_netbk *netbk)
-{
-	return !skb_queue_empty(&netbk->rx_queue);
-}
-
-static inline int tx_work_todo(struct xen_netbk *netbk)
-{
-	if (netbk->dealloc_cons != netbk->dealloc_prod)
-		return 1;
-
-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-	    !list_empty(&netbk->pending_inuse_head))
-		return 1;
-
-	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
-			!list_empty(&netbk->net_schedule_list))
-		return 1;
-
-	return 0;
-}
-
-static int netbk_action_thread(void *data)
-{
-	struct xen_netbk *netbk = (struct xen_netbk *)data;
-	while (!kthread_should_stop()) {
-		wait_event_interruptible(netbk->kthread.netbk_action_wq,
-				rx_work_todo(netbk)
-				|| tx_work_todo(netbk)
-				|| kthread_should_stop());
-		cond_resched();
-
-		if (kthread_should_stop())
-			break;
-
-		if (rx_work_todo(netbk))
-			net_rx_action((unsigned long)netbk);
-
-		if (tx_work_todo(netbk))
-			net_tx_action((unsigned long)netbk);
-	}
-
-	return 0;
-}
-
-static int __init netback_init(void)
-{
-	int i;
-	struct page *page;
-	int rc = 0;
-	int group;
-
-	if (!xen_pv_domain())
-		return -ENODEV;
-
-	xen_netbk_group_nr = num_online_cpus();
-	xen_netbk = vmalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
-	if (!xen_netbk) {
-		printk(KERN_ALERT "%s: out of memory\n", __func__);
-		return -ENOMEM;
-	}
-	memset(xen_netbk, 0, sizeof(struct xen_netbk) * xen_netbk_group_nr);
-
-	for (group = 0; group < xen_netbk_group_nr; group++) {
-		struct xen_netbk *netbk = &xen_netbk[group];
-		skb_queue_head_init(&netbk->rx_queue);
-		skb_queue_head_init(&netbk->tx_queue);
-
-		init_timer(&netbk->net_timer);
-		netbk->net_timer.data = (unsigned long)netbk;
-		netbk->net_timer.function = net_alarm;
-
-		init_timer(&netbk->netbk_tx_pending_timer);
-		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
-		netbk->netbk_tx_pending_timer.function =
-			netbk_tx_pending_timeout;
-
-		netbk->mmap_pages =
-			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-		if (!netbk->mmap_pages) {
-			printk(KERN_ALERT "%s: out of memory\n", __func__);
-			del_timer(&netbk->netbk_tx_pending_timer);
-			del_timer(&netbk->net_timer);
-			rc = -ENOMEM;
-			goto failed_init;
-		}
-
-		for (i = 0; i < MAX_PENDING_REQS; i++) {
-			page = netbk->mmap_pages[i];
-			SetPageForeign(page, netif_page_release);
-			netif_set_page_ext(page, group, i);
-			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
-		}
-
-		netbk->pending_cons = 0;
-		netbk->pending_prod = MAX_PENDING_REQS;
-		for (i = 0; i < MAX_PENDING_REQS; i++)
-			netbk->pending_ring[i] = i;
-
-		if (MODPARM_netback_kthread) {
-			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
-			netbk->kthread.task =
-				kthread_create(netbk_action_thread,
-					       (void *)netbk,
-					       "netback/%u", group);
-
-			if (!IS_ERR(netbk->kthread.task)) {
-				kthread_bind(netbk->kthread.task, group);
-			} else {
-				printk(KERN_ALERT
-					"kthread_run() fails at netback\n");
-				free_empty_pages_and_pagevec(netbk->mmap_pages,
-						MAX_PENDING_REQS);
-				del_timer(&netbk->netbk_tx_pending_timer);
-				del_timer(&netbk->net_timer);
-				rc = PTR_ERR(netbk->kthread.task);
-				goto failed_init;
-			}
-		} else {
-			tasklet_init(&netbk->tasklet.net_tx_tasklet,
-				     net_tx_action,
-				     (unsigned long)netbk);
-			tasklet_init(&netbk->tasklet.net_rx_tasklet,
-				     net_rx_action,
-				     (unsigned long)netbk);
-		}
-
-		INIT_LIST_HEAD(&netbk->pending_inuse_head);
-		INIT_LIST_HEAD(&netbk->net_schedule_list);
-
-		spin_lock_init(&netbk->net_schedule_list_lock);
-
-		atomic_set(&netbk->netfront_count, 0);
-
-		if (MODPARM_netback_kthread)
-			wake_up_process(netbk->kthread.task);
-	}
-
-	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
-	if (MODPARM_copy_skb) {
-		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
-					      NULL, 0))
-			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
-		else
-			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
-	}
-
-	rc = netif_xenbus_init();
-	if (rc)
-		goto failed_init;
-
-#ifdef NETBE_DEBUG_INTERRUPT
-	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
-				      0,
-				      netif_be_dbg,
-				      IRQF_SHARED,
-				      "net-be-dbg",
-				      &netif_be_dbg);
-#endif
-
-	return 0;
-
-failed_init:
-	for (i = 0; i < group; i++) {
-		struct xen_netbk *netbk = &xen_netbk[i];
-		free_empty_pages_and_pagevec(netbk->mmap_pages,
-				MAX_PENDING_REQS);
-		del_timer(&netbk->netbk_tx_pending_timer);
-		del_timer(&netbk->net_timer);
-		if (MODPARM_netback_kthread)
-			kthread_stop(netbk->kthread.task);
-	}
-	vfree(xen_netbk);
-	return rc;
-
-}
-
-module_init(netback_init);
-
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/netback/xenbus.c b/drivers/xen/netback/xenbus.c
deleted file mode 100644
index 867dc25..0000000
--- a/drivers/xen/netback/xenbus.c
+++ /dev/null
@@ -1,489 +0,0 @@
-/*
- * Xenbus code for netif backend
- *
- * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
- * Copyright (C) 2005 XenSource Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-
-#include "common.h"
-
-static int connect_rings(struct backend_info *);
-static void connect(struct backend_info *);
-static void backend_create_netif(struct backend_info *be);
-static void unregister_hotplug_status_watch(struct backend_info *be);
-
-static int netback_remove(struct xenbus_device *dev)
-{
-	struct backend_info *be = dev_get_drvdata(&dev->dev);
-
-	unregister_hotplug_status_watch(be);
-	if (be->netif) {
-		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
-		netif_disconnect(be->netif);
-		be->netif = NULL;
-	}
-	kfree(be);
-	dev_set_drvdata(&dev->dev, NULL);
-	return 0;
-}
-
-
-/**
- * Entry point to this code when a new device is created.  Allocate the basic
- * structures and switch to InitWait.
- */
-static int netback_probe(struct xenbus_device *dev,
-			 const struct xenbus_device_id *id)
-{
-	const char *message;
-	struct xenbus_transaction xbt;
-	int err;
-	int sg;
-	struct backend_info *be = kzalloc(sizeof(struct backend_info),
-					  GFP_KERNEL);
-	if (!be) {
-		xenbus_dev_fatal(dev, -ENOMEM,
-				 "allocating backend structure");
-		return -ENOMEM;
-	}
-
-	be->dev = dev;
-	dev_set_drvdata(&dev->dev, be);
-
-	sg = 1;
-	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
-		sg = 0;
-
-	do {
-		err = xenbus_transaction_start(&xbt);
-		if (err) {
-			xenbus_dev_fatal(dev, err, "starting transaction");
-			goto fail;
-		}
-
-		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
-		if (err) {
-			message = "writing feature-sg";
-			goto abort_transaction;
-		}
-
-		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
-				    "%d", sg);
-		if (err) {
-			message = "writing feature-gso-tcpv4";
-			goto abort_transaction;
-		}
-
-		/* We support rx-copy path. */
-		err = xenbus_printf(xbt, dev->nodename,
-				    "feature-rx-copy", "%d", 1);
-		if (err) {
-			message = "writing feature-rx-copy";
-			goto abort_transaction;
-		}
-
-		/*
-		 * We don't support rx-flip path (except old guests who don't
-		 * grok this feature flag).
-		 */
-		err = xenbus_printf(xbt, dev->nodename,
-				    "feature-rx-flip", "%d", 0);
-		if (err) {
-			message = "writing feature-rx-flip";
-			goto abort_transaction;
-		}
-
-		err = xenbus_transaction_end(xbt, 0);
-	} while (err == -EAGAIN);
-
-	if (err) {
-		xenbus_dev_fatal(dev, err, "completing transaction");
-		goto fail;
-	}
-
-	err = xenbus_switch_state(dev, XenbusStateInitWait);
-	if (err)
-		goto fail;
-
-	/* This kicks hotplug scripts, so do it immediately. */
-	backend_create_netif(be);
-
-	return 0;
-
-abort_transaction:
-	xenbus_transaction_end(xbt, 1);
-	xenbus_dev_fatal(dev, err, "%s", message);
-fail:
-	pr_debug("failed");
-	netback_remove(dev);
-	return err;
-}
-
-
-/*
- * Handle the creation of the hotplug script environment.  We add the script
- * and vif variables to the environment, for the benefit of the vif-* hotplug
- * scripts.
- */
-static int netback_uevent(struct xenbus_device *xdev,
-			  struct kobj_uevent_env *env)
-{
-	struct backend_info *be = dev_get_drvdata(&xdev->dev);
-	char *val;
-
-	val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
-	if (IS_ERR(val)) {
-		int err = PTR_ERR(val);
-		xenbus_dev_fatal(xdev, err, "reading script");
-		return err;
-	} else {
-		if (add_uevent_var(env, "script=%s", val)) {
-			kfree(val);
-			return -ENOMEM;
-		}
-		kfree(val);
-	}
-
-	if (!be || !be->netif)
-		return 0;
-
-	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
-}
-
-
-static void backend_create_netif(struct backend_info *be)
-{
-	int err;
-	long handle;
-	struct xenbus_device *dev = be->dev;
-
-	if (be->netif != NULL)
-		return;
-
-	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
-	if (err != 1) {
-		xenbus_dev_fatal(dev, err, "reading handle");
-		return;
-	}
-
-	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
-	if (IS_ERR(be->netif)) {
-		err = PTR_ERR(be->netif);
-		be->netif = NULL;
-		xenbus_dev_fatal(dev, err, "creating interface");
-		return;
-	}
-
-	kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
-}
-
-
-static void disconnect_backend(struct xenbus_device *dev)
-{
-	struct backend_info *be = dev_get_drvdata(&dev->dev);
-
-	if (be->netif) {
-		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
-		netif_disconnect(be->netif);
-		be->netif = NULL;
-	}
-}
-
-/**
- * Callback received when the frontend's state changes.
- */
-static void frontend_changed(struct xenbus_device *dev,
-			     enum xenbus_state frontend_state)
-{
-	struct backend_info *be = dev_get_drvdata(&dev->dev);
-
-	pr_debug("frontend state %s", xenbus_strstate(frontend_state));
-
-	be->frontend_state = frontend_state;
-
-	switch (frontend_state) {
-	case XenbusStateInitialising:
-		if (dev->state == XenbusStateClosed) {
-			printk(KERN_INFO "%s: %s: prepare for reconnect\n",
-			       __func__, dev->nodename);
-			xenbus_switch_state(dev, XenbusStateInitWait);
-		}
-		break;
-
-	case XenbusStateInitialised:
-		break;
-
-	case XenbusStateConnected:
-		if (dev->state == XenbusStateConnected)
-			break;
-		backend_create_netif(be);
-		if (be->netif)
-			connect(be);
-		break;
-
-	case XenbusStateClosing:
-		if (be->netif)
-			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-		disconnect_backend(dev);
-		xenbus_switch_state(dev, XenbusStateClosing);
-		break;
-
-	case XenbusStateClosed:
-		xenbus_switch_state(dev, XenbusStateClosed);
-		if (xenbus_dev_is_online(dev))
-			break;
-		/* fall through if not online */
-	case XenbusStateUnknown:
-		device_unregister(&dev->dev);
-		break;
-
-	default:
-		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
-				 frontend_state);
-		break;
-	}
-}
-
-
-static void xen_net_read_rate(struct xenbus_device *dev,
-			      unsigned long *bytes, unsigned long *usec)
-{
-	char *s, *e;
-	unsigned long b, u;
-	char *ratestr;
-
-	/* Default to unlimited bandwidth. */
-	*bytes = ~0UL;
-	*usec = 0;
-
-	ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
-	if (IS_ERR(ratestr))
-		return;
-
-	s = ratestr;
-	b = simple_strtoul(s, &e, 10);
-	if ((s == e) || (*e != ','))
-		goto fail;
-
-	s = e + 1;
-	u = simple_strtoul(s, &e, 10);
-	if ((s == e) || (*e != '\0'))
-		goto fail;
-
-	*bytes = b;
-	*usec = u;
-
-	kfree(ratestr);
-	return;
-
- fail:
-	pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
-	kfree(ratestr);
-}
-
-static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
-{
-	char *s, *e, *macstr;
-	int i;
-
-	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
-	if (IS_ERR(macstr))
-		return PTR_ERR(macstr);
-
-	for (i = 0; i < ETH_ALEN; i++) {
-		mac[i] = simple_strtoul(s, &e, 16);
-		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
-			kfree(macstr);
-			return -ENOENT;
-		}
-		s = e+1;
-	}
-
-	kfree(macstr);
-	return 0;
-}
-
-static void unregister_hotplug_status_watch(struct backend_info *be)
-{
-	if (be->have_hotplug_status_watch) {
-		unregister_xenbus_watch(&be->hotplug_status_watch);
-		kfree(be->hotplug_status_watch.node);
-	}
-	be->have_hotplug_status_watch = 0;
-}
-
-static void hotplug_status_changed(struct xenbus_watch *watch,
-				   const char **vec,
-				   unsigned int vec_size)
-{
-	struct backend_info *be = container_of(watch,
-					       struct backend_info,
-					       hotplug_status_watch);
-	char *str;
-	unsigned int len;
-
-	str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
-	if (IS_ERR(str))
-		return;
-	if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
-		xenbus_switch_state(be->dev, XenbusStateConnected);
-		/* Not interested in this watch anymore. */
-		unregister_hotplug_status_watch(be);
-	}
-	kfree(str);
-}
-
-static void connect(struct backend_info *be)
-{
-	int err;
-	struct xenbus_device *dev = be->dev;
-
-	err = connect_rings(be);
-	if (err)
-		return;
-
-	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
-	if (err) {
-		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
-		return;
-	}
-
-	xen_net_read_rate(dev, &be->netif->credit_bytes,
-			  &be->netif->credit_usec);
-	be->netif->remaining_credit = be->netif->credit_bytes;
-
-	unregister_hotplug_status_watch(be);
-	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
-				   hotplug_status_changed,
-				   "%s/%s", dev->nodename, "hotplug-status");
-	if (err) {
-		/* Switch now, since we can't do a watch. */
-		xenbus_switch_state(dev, XenbusStateConnected);
-	} else {
-		be->have_hotplug_status_watch = 1;
-	}
-
-	netif_wake_queue(be->netif->dev);
-}
-
-
-static int connect_rings(struct backend_info *be)
-{
-	struct xen_netif *netif = be->netif;
-	struct xenbus_device *dev = be->dev;
-	unsigned long tx_ring_ref, rx_ring_ref;
-	unsigned int evtchn, rx_copy;
-	int err;
-	int val;
-
-	err = xenbus_gather(XBT_NIL, dev->otherend,
-			    "tx-ring-ref", "%lu", &tx_ring_ref,
-			    "rx-ring-ref", "%lu", &rx_ring_ref,
-			    "event-channel", "%u", &evtchn, NULL);
-	if (err) {
-		xenbus_dev_fatal(dev, err,
-				 "reading %s/ring-ref and event-channel",
-				 dev->otherend);
-		return err;
-	}
-
-	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
-			   &rx_copy);
-	if (err == -ENOENT) {
-		err = 0;
-		rx_copy = 0;
-	}
-	if (err < 0) {
-		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
-				 dev->otherend);
-		return err;
-	}
-	if (!rx_copy)
-		return -EOPNOTSUPP;
-
-	if (netif->dev->tx_queue_len != 0) {
-		if (xenbus_scanf(XBT_NIL, dev->otherend,
-				 "feature-rx-notify", "%d", &val) < 0)
-			val = 0;
-		if (val)
-			netif->can_queue = 1;
-		else
-			/* Must be non-zero for pfifo_fast to work. */
-			netif->dev->tx_queue_len = 1;
-	}
-
-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
-			 "%d", &val) < 0)
-		val = 0;
-	netif->can_sg = !!val;
-
-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
-			 "%d", &val) < 0)
-		val = 0;
-	netif->gso = !!val;
-
-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
-			 "%d", &val) < 0)
-		val = 0;
-	netif->gso_prefix = !!val;
-
-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
-			 "%d", &val) < 0)
-		val = 0;
-	netif->csum = !val;
-
-	/* Set dev->features */
-	netif_set_features(netif);
-
-	/* Map the shared frame, irq etc. */
-	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
-	if (err) {
-		xenbus_dev_fatal(dev, err,
-				 "mapping shared-frames %lu/%lu port %u",
-				 tx_ring_ref, rx_ring_ref, evtchn);
-		return err;
-	}
-	return 0;
-}
-
-
-/* ** Driver Registration ** */
-
-
-static const struct xenbus_device_id netback_ids[] = {
-	{ "vif" },
-	{ "" }
-};
-
-
-static struct xenbus_driver netback = {
-	.name = "vif",
-	.owner = THIS_MODULE,
-	.ids = netback_ids,
-	.probe = netback_probe,
-	.remove = netback_remove,
-	.uevent = netback_uevent,
-	.otherend_changed = frontend_changed,
-};
-
-
-int netif_xenbus_init(void)
-{
-	printk(KERN_CRIT "registering netback\n");
-	return xenbus_register_backend(&netback);
-}
--
1.7.4


From 2cd806db456cf60a886a55547a840df9e4e39d0f Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 20 Jan 2011 13:02:14 +0000
Subject: [PATCH 158/197] xen: netback: remove queue_length module option

This setting can be controlled via sysfs.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/interface.c |   11 ++---------
 1 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index b429f8c..249f010 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -36,14 +36,7 @@
 #include <xen/events.h>
 #include <asm/xen/hypercall.h>

-/*
- * Module parameter 'queue_length':
- *
- * Enables queuing in the network stack when a client has run out of receive
- * descriptors.
- */
-static unsigned long netbk_queue_length = 32;
-module_param_named(queue_length, netbk_queue_length, ulong, 0644);
+#define NETBK_QUEUE_LENGTH 32

 static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
 			   struct xen_netif *netif)
@@ -296,7 +289,7 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
 	netif_set_features(netif);
 	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);

-	dev->tx_queue_len = netbk_queue_length;
+	dev->tx_queue_len = NETBK_QUEUE_LENGTH;

 	/*
 	 * Initialise a dummy MAC address. We choose the numerically
--
1.7.4


From 864698db8c54a8cba55e4976a081a58a5f15726e Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 20 Jan 2011 14:16:52 +0000
Subject: [PATCH 159/197] xen: netback: correct error return from ethtool hooks.

Should be -EOPNOTSUPP not -ENOSYS.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/interface.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 249f010..c41d93e 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -130,7 +130,7 @@ static int netbk_set_tx_csum(struct net_device *dev, u32 data)
 	struct xen_netif *netif = netdev_priv(dev);
 	if (data) {
 		if (!netif->csum)
-			return -ENOSYS;
+			return -EOPNOTSUPP;
 		netif->features_disabled &= ~NETIF_F_IP_CSUM;
 	} else {
 		netif->features_disabled |= NETIF_F_IP_CSUM;
@@ -145,7 +145,7 @@ static int netbk_set_sg(struct net_device *dev, u32 data)
 	struct xen_netif *netif = netdev_priv(dev);
 	if (data) {
 		if (!netif->can_sg)
-			return -ENOSYS;
+			return -EOPNOTSUPP;
 		netif->features_disabled &= ~NETIF_F_SG;
 	} else {
 		netif->features_disabled |= NETIF_F_SG;
@@ -160,7 +160,7 @@ static int netbk_set_tso(struct net_device *dev, u32 data)
 	struct xen_netif *netif = netdev_priv(dev);
 	if (data) {
 		if (!netif->gso && !netif->gso_prefix)
-			return -ENOSYS;
+			return -EOPNOTSUPP;
 		netif->features_disabled &= ~NETIF_F_TSO;
 	} else {
 		netif->features_disabled |= NETIF_F_TSO;
--
1.7.4


From 0d5c899d818055dbf35380315401d646e3f76bad Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 20 Jan 2011 14:20:04 +0000
Subject: [PATCH 160/197] xen: netback: avoid leading _ in function parameter names.

It is usually used to distinguish lower-level functions or to avoid naming
conflicts in macros.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/netback.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index b290525..cd2af15 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -90,7 +90,7 @@ static inline void netif_set_page_ext(struct page *pg,
 }

 static int netif_get_page_ext(struct page *pg,
-			      unsigned int *_group, unsigned int *_idx)
+			      unsigned int *pgroup, unsigned int *pidx)
 {
 	union page_ext ext = { .mapping = pg->mapping };
 	struct xen_netbk *netbk;
@@ -117,8 +117,8 @@ static int netif_get_page_ext(struct page *pg,
 	if (netbk->mmap_pages[idx] != pg)
 		return 0;

-	*_group = group;
-	*_idx = idx;
+	*pgroup = group;
+	*pidx = idx;

 	return 1;
 }
--
1.7.4


From e46f8bf929da99644cd6d672e33541943d800e79 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 20 Jan 2011 14:21:24 +0000
Subject: [PATCH 161/197] xen: netback: drop unused debug interrupt handler.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/netback.c |   57 -------------------------------------
 1 files changed, 0 insertions(+), 57 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index cd2af15..a7646f3 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -47,8 +47,6 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>

-/*define NETBE_DEBUG_INTERRUPT*/
-
 struct xen_netbk *xen_netbk;
 int xen_netbk_group_nr;

@@ -1707,52 +1705,6 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
 	return resp;
 }

-#ifdef NETBE_DEBUG_INTERRUPT
-static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
-{
-	struct list_head *ent;
-	struct xen_netif *netif;
-	int i = 0;
-	int group = 0;
-
-	printk(KERN_ALERT "netif_schedule_list:\n");
-
-	for (group = 0; group < xen_netbk_group_nr; group++) {
-		struct xen_netbk *netbk = &xen_netbk[group];
-		spin_lock_irq(&netbk->net_schedule_list_lock);
-		printk(KERN_ALERT "xen_netback group number: %d\n", group);
-		list_for_each(ent, &netbk->net_schedule_list) {
-			netif = list_entry(ent, struct xen_netif, list);
-			printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
-				"rx_resp_prod=%08x\n",
-				i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
-			printk(KERN_ALERT
-				"   tx_req_cons=%08x, tx_resp_prod=%08x)\n",
-				netif->tx.req_cons, netif->tx.rsp_prod_pvt);
-			printk(KERN_ALERT
-				"   shared(rx_req_prod=%08x "
-				"rx_resp_prod=%08x\n",
-				netif->rx.sring->req_prod,
-				netif->rx.sring->rsp_prod);
-			printk(KERN_ALERT
-				"   rx_event=%08x, tx_req_prod=%08x\n",
-				netif->rx.sring->rsp_event,
-				netif->tx.sring->req_prod);
-			printk(KERN_ALERT
-				"   tx_resp_prod=%08x, tx_event=%08x)\n",
-				netif->tx.sring->rsp_prod,
-				netif->tx.sring->rsp_event);
-			i++;
-		}
-		spin_unlock_irq(&netbk->net_schedule_list_lock);
-	}
-
-	printk(KERN_ALERT " ** End of netif_schedule_list **\n");
-
-	return IRQ_HANDLED;
-}
-#endif
-
 static inline int rx_work_todo(struct xen_netbk *netbk)
 {
 	return !skb_queue_empty(&netbk->rx_queue);
@@ -1903,15 +1855,6 @@ static int __init netback_init(void)
 	if (rc)
 		goto failed_init;

-#ifdef NETBE_DEBUG_INTERRUPT
-	(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
-				      0,
-				      netif_be_dbg,
-				      IRQF_SHARED,
-				      "net-be-dbg",
-				      &netif_be_dbg);
-#endif
-
 	return 0;

 failed_init:
--
1.7.4


From 58941407abf6a701bc59d903deec0ac141bccff3 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 20 Jan 2011 16:42:27 +0000
Subject: [PATCH 162/197] xen: netif: properly namespace the Xen netif protocol header.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/netback.c |   38 +++++++++---------
 drivers/net/xen-netfront.c        |   20 +++++-----
 include/xen/interface/io/netif.h  |   80 ++++++++++++++++++------------------
 3 files changed, 69 insertions(+), 69 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index a7646f3..69b4535 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -582,7 +582,7 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
 			   struct netrx_pending_operations *npo)
 {
 	struct gnttab_copy     *copy_op;
-	int status = NETIF_RSP_OKAY;
+	int status = XEN_NETIF_RSP_OKAY;
 	int i;

 	for (i = 0; i < nr_meta_slots; i++) {
@@ -590,7 +590,7 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
 		if (copy_op->status != GNTST_okay) {
 				pr_debug("Bad status %d from copy to DOM%d.\n",
 					 copy_op->status, domid);
-				status = NETIF_RSP_ERROR;
+				status = XEN_NETIF_RSP_ERROR;
 			}
 	}

@@ -615,7 +615,7 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
 		if (i == nr_meta_slots - 1)
 			flags = 0;
 		else
-			flags = NETRXF_more_data;
+			flags = XEN_NETRXF_more_data;

 		offset = 0;
 		make_rx_response(netif, meta[i].id, status, offset,
@@ -687,7 +687,7 @@ static void net_rx_action(unsigned long data)
 			resp = RING_GET_RESPONSE(&netif->rx,
 						netif->rx.rsp_prod_pvt++);

-			resp->flags = NETRXF_gso_prefix | NETRXF_more_data;
+			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;

 			resp->offset = netbk->meta[npo.meta_cons].gso_size;
 			resp->id = netbk->meta[npo.meta_cons].id;
@@ -707,13 +707,13 @@ static void net_rx_action(unsigned long data)
 		if (sco->meta_slots_used == 1)
 			flags = 0;
 		else
-			flags = NETRXF_more_data;
+			flags = XEN_NETRXF_more_data;

 		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
-			flags |= NETRXF_csum_blank | NETRXF_data_validated;
+			flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 			/* remote but checksummed. */
-			flags |= NETRXF_data_validated;
+			flags |= XEN_NETRXF_data_validated;

 		offset = 0;
 		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
@@ -727,7 +727,7 @@ static void net_rx_action(unsigned long data)
 				RING_GET_RESPONSE(&netif->rx,
 						  netif->rx.rsp_prod_pvt++);

-			resp->flags |= NETRXF_extra_info;
+			resp->flags |= XEN_NETRXF_extra_info;

 			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
 			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
@@ -989,7 +989,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
 		netif = pending_tx_info[pending_idx].netif;

 		make_tx_response(netif, &pending_tx_info[pending_idx].req,
-				 NETIF_RSP_OKAY);
+				 XEN_NETIF_RSP_OKAY);

 		/* Ready for next use. */
 		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
@@ -1009,7 +1009,7 @@ static void netbk_tx_err(struct xen_netif *netif,
 	RING_IDX cons = netif->tx.req_cons;

 	do {
-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
 		if (cons >= end)
 			break;
 		txp = RING_GET_REQUEST(&netif->tx, cons++);
@@ -1027,7 +1027,7 @@ static int netbk_count_requests(struct xen_netif *netif,
 	RING_IDX cons = netif->tx.req_cons;
 	int frags = 0;

-	if (!(first->flags & NETTXF_more_data))
+	if (!(first->flags & XEN_NETTXF_more_data))
 		return 0;

 	do {
@@ -1056,7 +1056,7 @@ static int netbk_count_requests(struct xen_netif *netif,
 				 txp->offset, txp->size);
 			return -frags;
 		}
-	} while ((txp++)->flags & NETTXF_more_data);
+	} while ((txp++)->flags & XEN_NETTXF_more_data);

 	return frags;
 }
@@ -1115,7 +1115,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
 		pending_ring_idx_t index;
 		index = pending_index(netbk->pending_prod++);
 		txp = &pending_tx_info[pending_idx].req;
-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
 		netbk->pending_ring[index] = pending_idx;
 		netif_put(netif);
 	} else {
@@ -1151,7 +1151,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,

 		/* Error on this fragment: respond to client with an error. */
 		txp = &netbk->pending_tx_info[pending_idx].req;
-		make_tx_response(netif, txp, NETIF_RSP_ERROR);
+		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
 		index = pending_index(netbk->pending_prod++);
 		netbk->pending_ring[index] = pending_idx;
 		netif_put(netif);
@@ -1406,7 +1406,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
 		netif->tx.req_cons = ++idx;

 		memset(extras, 0, sizeof(extras));
-		if (txreq.flags & NETTXF_extra_info) {
+		if (txreq.flags & XEN_NETTXF_extra_info) {
 			work_to_do = netbk_get_extras(netif, extras,
 						      work_to_do);
 			idx = netif->tx.req_cons;
@@ -1542,9 +1542,9 @@ static void net_tx_submit(struct xen_netbk *netbk)
 			netif_idx_release(netbk, pending_idx);
 		}

-		if (txp->flags & NETTXF_csum_blank)
+		if (txp->flags & XEN_NETTXF_csum_blank)
 			skb->ip_summed = CHECKSUM_PARTIAL;
-		else if (txp->flags & NETTXF_data_validated)
+		else if (txp->flags & XEN_NETTXF_data_validated)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;

 		netbk_fill_frags(netbk, skb);
@@ -1673,8 +1673,8 @@ static void make_tx_response(struct xen_netif *netif,
 	resp->id     = txp->id;
 	resp->status = st;

-	if (txp->flags & NETTXF_extra_info)
-		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
+	if (txp->flags & XEN_NETTXF_extra_info)
+		RING_GET_RESPONSE(&netif->tx, ++i)->status = XEN_NETIF_RSP_NULL;

 	netif->tx.rsp_prod_pvt = ++i;
 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 458bb57..cc23d42 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -356,7 +356,7 @@ static void xennet_tx_buf_gc(struct net_device *dev)
 			struct xen_netif_tx_response *txrsp;

 			txrsp = RING_GET_RESPONSE(&np->tx, cons);
-			if (txrsp->status == NETIF_RSP_NULL)
+			if (txrsp->status == XEN_NETIF_RSP_NULL)
 				continue;

 			id  = txrsp->id;
@@ -413,7 +413,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
 	   larger than a page), split it it into page-sized chunks. */
 	while (len > PAGE_SIZE - offset) {
 		tx->size = PAGE_SIZE - offset;
-		tx->flags |= NETTXF_more_data;
+		tx->flags |= XEN_NETTXF_more_data;
 		len -= tx->size;
 		data += tx->size;
 		offset = 0;
@@ -439,7 +439,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
 	for (i = 0; i < frags; i++) {
 		skb_frag_t *frag = skb_shinfo(skb)->frags + i;

-		tx->flags |= NETTXF_more_data;
+		tx->flags |= XEN_NETTXF_more_data;

 		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
 		np->tx_skbs[id].skb = skb_get(skb);
@@ -514,10 +514,10 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx->flags = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		/* local packet? */
-		tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
+		tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
 	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 		/* remote but checksummed. */
-		tx->flags |= NETTXF_data_validated;
+		tx->flags |= XEN_NETTXF_data_validated;

 	if (skb_shinfo(skb)->gso_size) {
 		struct xen_netif_extra_info *gso;
@@ -528,7 +528,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (extra)
 			extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
 		else
-			tx->flags |= NETTXF_extra_info;
+			tx->flags |= XEN_NETTXF_extra_info;

 		gso->u.gso.size = skb_shinfo(skb)->gso_size;
 		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
@@ -648,7 +648,7 @@ static int xennet_get_responses(struct netfront_info *np,
 	int err = 0;
 	unsigned long ret;

-	if (rx->flags & NETRXF_extra_info) {
+	if (rx->flags & XEN_NETRXF_extra_info) {
 		err = xennet_get_extras(np, extras, rp);
 		cons = np->rx.rsp_cons;
 	}
@@ -685,7 +685,7 @@ static int xennet_get_responses(struct netfront_info *np,
 		__skb_queue_tail(list, skb);

 next:
-		if (!(rx->flags & NETRXF_more_data))
+		if (!(rx->flags & XEN_NETRXF_more_data))
 			break;

 		if (cons + frags == rp) {
@@ -950,9 +950,9 @@ err:
 		skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
 		skb->len += skb->data_len;

-		if (rx->flags & NETRXF_csum_blank)
+		if (rx->flags & XEN_NETRXF_csum_blank)
 			skb->ip_summed = CHECKSUM_PARTIAL;
-		else if (rx->flags & NETRXF_data_validated)
+		else if (rx->flags & XEN_NETRXF_data_validated)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;

 		__skb_queue_tail(&rxq, skb);
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index 8309344..cb94668 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -22,50 +22,50 @@

 /*
  * This is the 'wire' format for packets:
- *  Request 1: netif_tx_request -- NETTXF_* (any flags)
- * [Request 2: netif_tx_extra]  (only if request 1 has NETTXF_extra_info)
- * [Request 3: netif_tx_extra]  (only if request 2 has XEN_NETIF_EXTRA_MORE)
- *  Request 4: netif_tx_request -- NETTXF_more_data
- *  Request 5: netif_tx_request -- NETTXF_more_data
+ *  Request 1: xen_netif_tx_request  -- XEN_NETTXF_* (any flags)
+ * [Request 2: xen_netif_extra_info]    (only if request 1 has XEN_NETTXF_extra_info)
+ * [Request 3: xen_netif_extra_info]    (only if request 2 has XEN_NETIF_EXTRA_MORE)
+ *  Request 4: xen_netif_tx_request  -- XEN_NETTXF_more_data
+ *  Request 5: xen_netif_tx_request  -- XEN_NETTXF_more_data
  *  ...
- *  Request N: netif_tx_request -- 0
+ *  Request N: xen_netif_tx_request  -- 0
  */

 /* Protocol checksum field is blank in the packet (hardware offload)? */
-#define _NETTXF_csum_blank     (0)
-#define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)
+#define _XEN_NETTXF_csum_blank		(0)
+#define  XEN_NETTXF_csum_blank		(1U<<_XEN_NETTXF_csum_blank)

 /* Packet data has been validated against protocol checksum. */
-#define _NETTXF_data_validated (1)
-#define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
+#define _XEN_NETTXF_data_validated	(1)
+#define  XEN_NETTXF_data_validated	(1U<<_XEN_NETTXF_data_validated)

 /* Packet continues in the next request descriptor. */
-#define _NETTXF_more_data      (2)
-#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
+#define _XEN_NETTXF_more_data		(2)
+#define  XEN_NETTXF_more_data		(1U<<_XEN_NETTXF_more_data)

 /* Packet to be followed by extra descriptor(s). */
-#define _NETTXF_extra_info     (3)
-#define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)
+#define _XEN_NETTXF_extra_info		(3)
+#define  XEN_NETTXF_extra_info		(1U<<_XEN_NETTXF_extra_info)

 struct xen_netif_tx_request {
     grant_ref_t gref;      /* Reference to buffer page */
     uint16_t offset;       /* Offset within buffer page */
-    uint16_t flags;        /* NETTXF_* */
+    uint16_t flags;        /* XEN_NETTXF_* */
     uint16_t id;           /* Echoed in response message. */
     uint16_t size;         /* Packet size in bytes.       */
 };

-/* Types of netif_extra_info descriptors. */
-#define XEN_NETIF_EXTRA_TYPE_NONE  (0)  /* Never used - invalid */
-#define XEN_NETIF_EXTRA_TYPE_GSO   (1)  /* u.gso */
-#define XEN_NETIF_EXTRA_TYPE_MAX   (2)
+/* Types of xen_netif_extra_info descriptors. */
+#define XEN_NETIF_EXTRA_TYPE_NONE	(0)  /* Never used - invalid */
+#define XEN_NETIF_EXTRA_TYPE_GSO	(1)  /* u.gso */
+#define XEN_NETIF_EXTRA_TYPE_MAX	(2)

-/* netif_extra_info flags. */
-#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
-#define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
+/* xen_netif_extra_info flags. */
+#define _XEN_NETIF_EXTRA_FLAG_MORE	(0)
+#define  XEN_NETIF_EXTRA_FLAG_MORE	(1U<<_XEN_NETIF_EXTRA_FLAG_MORE)

 /* GSO types - only TCPv4 currently supported. */
-#define XEN_NETIF_GSO_TYPE_TCPV4        (1)
+#define XEN_NETIF_GSO_TYPE_TCPV4	(1)

 /*
  * This structure needs to fit within both netif_tx_request and
@@ -107,7 +107,7 @@ struct xen_netif_extra_info {

 struct xen_netif_tx_response {
 	uint16_t id;
-	int16_t  status;       /* NETIF_RSP_* */
+	int16_t  status;       /* XEN_NETIF_RSP_* */
 };

 struct xen_netif_rx_request {
@@ -116,29 +116,29 @@ struct xen_netif_rx_request {
 };

 /* Packet data has been validated against protocol checksum. */
-#define _NETRXF_data_validated (0)
-#define  NETRXF_data_validated (1U<<_NETRXF_data_validated)
+#define _XEN_NETRXF_data_validated	(0)
+#define  XEN_NETRXF_data_validated	(1U<<_XEN_NETRXF_data_validated)

 /* Protocol checksum field is blank in the packet (hardware offload)? */
-#define _NETRXF_csum_blank     (1)
-#define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
+#define _XEN_NETRXF_csum_blank		(1)
+#define  XEN_NETRXF_csum_blank		(1U<<_XEN_NETRXF_csum_blank)

 /* Packet continues in the next request descriptor. */
-#define _NETRXF_more_data      (2)
-#define  NETRXF_more_data      (1U<<_NETRXF_more_data)
+#define _XEN_NETRXF_more_data		(2)
+#define  XEN_NETRXF_more_data		(1U<<_XEN_NETRXF_more_data)

 /* Packet to be followed by extra descriptor(s). */
-#define _NETRXF_extra_info     (3)
-#define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
+#define _XEN_NETRXF_extra_info		(3)
+#define  XEN_NETRXF_extra_info		(1U<<_XEN_NETRXF_extra_info)

 /* GSO Prefix descriptor. */
-#define _NETRXF_gso_prefix     (4)
-#define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
+#define _XEN_NETRXF_gso_prefix		(4)
+#define  XEN_NETRXF_gso_prefix		(1U<<_XEN_NETRXF_gso_prefix)

 struct xen_netif_rx_response {
     uint16_t id;
     uint16_t offset;       /* Offset in page of start of received packet  */
-    uint16_t flags;        /* NETRXF_* */
+    uint16_t flags;        /* XEN_NETRXF_* */
     int16_t  status;       /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
 };

@@ -153,10 +153,10 @@ DEFINE_RING_TYPES(xen_netif_rx,
 		  struct xen_netif_rx_request,
 		  struct xen_netif_rx_response);

-#define NETIF_RSP_DROPPED         -2
-#define NETIF_RSP_ERROR           -1
-#define NETIF_RSP_OKAY             0
-/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
-#define NETIF_RSP_NULL             1
+#define XEN_NETIF_RSP_DROPPED	-2
+#define XEN_NETIF_RSP_ERROR	-1
+#define XEN_NETIF_RSP_OKAY	 0
+/* No response: used for auxiliary requests (e.g., xen_netif_extra_info). */
+#define XEN_NETIF_RSP_NULL	 1

 #endif
--
1.7.4


From e71b6269484033967d489f0ebb1333d840d18fe0 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 1 Feb 2011 16:50:24 +0000
Subject: [PATCH 163/197] xen: netif: improve Kconfig help text for front- and backend drivers.

Signed-off-by: Ian Campell <ian.campbell@citrix.com>
---
 drivers/net/Kconfig |   35 +++++++++++++++++++++++++++--------
 1 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 5b088f5..1c77e183 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2963,19 +2963,38 @@ config XEN_NETDEV_FRONTEND
 	select XEN_XENBUS_FRONTEND
 	default y
 	help
-	  The network device frontend driver allows the kernel to
-	  access network devices exported exported by a virtual
-	  machine containing a physical network device driver. The
-	  frontend driver is intended for unprivileged guest domains;
-	  if you are compiling a kernel for a Xen guest, you almost
-	  certainly want to enable this.
+	  This driver provides support for Xen paravirtual network
+	  devices exported by a Xen network driver domain (often
+	  domain 0).
+
+	  The corresponding Linux backend driver is enabled by the
+	  CONFIG_XEN_NETDEV_BACKEND option.
+
+	  If you are compiling a kernel for use as Xen guest, you
+	  should say Y here. To compile this driver as a module, chose
+	  M here: the module will be called xen-netfront.

 config XEN_NETDEV_BACKEND
 	tristate "Xen backend network device"
 	depends on XEN_BACKEND
 	help
-	  Implement the network backend driver, which passes packets
-	  from the guest domain's frontend drivers to the network.
+	  This driver allows the kernel to act as a Xen network driver
+	  domain which exports paravirtual network devices to other
+	  Xen domains. These devices can be accessed by any operating
+	  system that implements a compatible front end.
+
+	  The corresponding Linux frontend driver is enabled by the
+	  CONFIG_XEN_NETDEV_FRONTEND configuration option.
+
+	  The backend driver presents a standard network device
+	  endpoint for each paravirtual network device to the driver
+	  domain network stack. These can then be bridged or routed
+	  etc in order to provide full network connectivity.
+
+	  If you are compiling a kernel to run in a Xen network driver
+	  domain (often this is domain 0) you should say Y here. To
+	  compile this driver as a module, chose M here: the module
+	  will be called xen-netback.

 config ISERIES_VETH
 	tristate "iSeries Virtual Ethernet driver support"
--
1.7.4


From 02023871e59024643ee46dc30f920532acf1d04f Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 19 Jan 2011 14:41:55 +0000
Subject: [PATCH 164/197] xen: netback: drop ethtool drvinfo callback

The default provided by the network core is sufficient for our needs.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/interface.c |    9 ---------
 1 files changed, 0 insertions(+), 9 deletions(-)

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index c41d93e..28b0832 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -170,13 +170,6 @@ static int netbk_set_tso(struct net_device *dev, u32 data)
 	return 0;
 }

-static void netbk_get_drvinfo(struct net_device *dev,
-			      struct ethtool_drvinfo *info)
-{
-	strcpy(info->driver, "netbk");
-	strcpy(info->bus_info, dev_name(dev->dev.parent));
-}
-
 static const struct netif_stat {
 	char name[ETH_GSTRING_LEN];
 	u16 offset;
@@ -225,8 +218,6 @@ static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
 }

 static struct ethtool_ops network_ethtool_ops = {
-	.get_drvinfo = netbk_get_drvinfo,
-
 	.get_tx_csum = ethtool_op_get_tx_csum,
 	.set_tx_csum = netbk_set_tx_csum,
 	.get_sg = ethtool_op_get_sg,
--
1.7.4


From 138c97fc070eb65b7a0644fb6b6715350150702f Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 20 Jan 2011 16:24:28 +0000
Subject: [PATCH 165/197] xen: netback: use xen_netbk prefix where appropriate

Do not use net_ of netif_ since these belong to the network core.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/netback.c |   84 ++++++++++++++++++------------------
 1 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 69b4535..6ed4459 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -50,7 +50,7 @@
 struct xen_netbk *xen_netbk;
 int xen_netbk_group_nr;

-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx);
+static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
 static void make_tx_response(struct xen_netif *netif,
 			     struct xen_netif_tx_request *txp,
 			     s8       st);
@@ -61,9 +61,9 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
 					     u16      size,
 					     u16      flags);

-static void net_tx_action(unsigned long data);
+static void xen_netbk_tx_action(unsigned long data);

-static void net_rx_action(unsigned long data);
+static void xen_netbk_rx_action(unsigned long data);

 static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
 				       unsigned int idx)
@@ -78,8 +78,8 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
 }

 /* extra field used in struct page */
-static inline void netif_set_page_ext(struct page *pg,
-				      unsigned int group, unsigned int idx)
+static inline void set_page_ext(struct page *pg,
+				unsigned int group, unsigned int idx)
 {
 	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };

@@ -87,8 +87,8 @@ static inline void netif_set_page_ext(struct page *pg,
 	pg->mapping = ext.mapping;
 }

-static int netif_get_page_ext(struct page *pg,
-			      unsigned int *pgroup, unsigned int *pidx)
+static int get_page_ext(struct page *pg,
+			unsigned int *pgroup, unsigned int *pidx)
 {
 	union page_ext ext = { .mapping = pg->mapping };
 	struct xen_netbk *netbk;
@@ -445,11 +445,11 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 	struct gnttab_copy *copy_gop;
 	struct netbk_rx_meta *meta;
 	/*
-	 * These variables a used iff netif_get_page_ext returns true,
+	 * These variables a used iff get_page_ext returns true,
 	 * in which case they are guaranteed to be initialized.
 	 */
 	unsigned int uninitialized_var(group), uninitialized_var(idx);
-	int foreign = netif_get_page_ext(page, &group, &idx);
+	int foreign = get_page_ext(page, &group, &idx);
 	unsigned long bytes;

 	/* Data must not cross a page boundary. */
@@ -627,7 +627,7 @@ struct skb_cb_overlay {
 	int meta_slots_used;
 };

-static void net_rx_action(unsigned long data)
+static void xen_netbk_rx_action(unsigned long data)
 {
 	struct xen_netif *netif = NULL;
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
@@ -915,7 +915,7 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
 	do {
 		dp = netbk->dealloc_prod;

-		/* Ensure we see all indices enqueued by netif_idx_release(). */
+		/* Ensure we see all indices enqueued by xen_netbk_idx_release(). */
 		smp_rmb();

 		while (dc != dp) {
@@ -1061,11 +1061,11 @@ static int netbk_count_requests(struct xen_netif *netif,
 	return frags;
 }

-static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
-						       struct xen_netif *netif,
-						       struct sk_buff *skb,
-						       struct xen_netif_tx_request *txp,
-						       struct gnttab_map_grant_ref *mop)
+static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *netbk,
+							   struct xen_netif *netif,
+							   struct sk_buff *skb,
+							   struct xen_netif_tx_request *txp,
+							   struct gnttab_map_grant_ref *mop)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	skb_frag_t *frags = shinfo->frags;
@@ -1096,9 +1096,9 @@ static struct gnttab_map_grant_ref *netbk_get_requests(struct xen_netbk *netbk,
 	return mop;
 }

-static int netbk_tx_check_mop(struct xen_netbk *netbk,
-			      struct sk_buff *skb,
-			      struct gnttab_map_grant_ref **mopp)
+static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
+				  struct sk_buff *skb,
+				  struct gnttab_map_grant_ref **mopp)
 {
 	struct gnttab_map_grant_ref *mop = *mopp;
 	int pending_idx = *((u16 *)skb->data);
@@ -1145,7 +1145,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
 			netbk->grant_tx_handle[pending_idx] = mop->handle;
 			/* Had a previous error? Invalidate this fragment. */
 			if (unlikely(err))
-				netif_idx_release(netbk, pending_idx);
+				xen_netbk_idx_release(netbk, pending_idx);
 			continue;
 		}

@@ -1162,10 +1162,10 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,

 		/* First error: invalidate header and preceding fragments. */
 		pending_idx = *((u16 *)skb->data);
-		netif_idx_release(netbk, pending_idx);
+		xen_netbk_idx_release(netbk, pending_idx);
 		for (j = start; j < i; j++) {
 			pending_idx = (unsigned long)shinfo->frags[i].page;
-			netif_idx_release(netbk, pending_idx);
+			xen_netbk_idx_release(netbk, pending_idx);
 		}

 		/* Remember the error: invalidate all subsequent fragments. */
@@ -1176,7 +1176,7 @@ static int netbk_tx_check_mop(struct xen_netbk *netbk,
 	return err;
 }

-static void netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
+static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	int nr_frags = shinfo->nr_frags;
@@ -1359,7 +1359,7 @@ static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
 	return false;
 }

-static unsigned net_tx_build_mops(struct xen_netbk *netbk)
+static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
 {
 	struct gnttab_map_grant_ref *mop;
 	struct sk_buff *skb;
@@ -1493,7 +1493,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)

 		netbk->pending_cons++;

-		mop = netbk_get_requests(netbk, netif, skb, txfrags, mop);
+		mop = xen_netbk_get_requests(netbk, netif, skb, txfrags, mop);

 		netif->tx.req_cons = idx;
 		netif_schedule_work(netif);
@@ -1505,7 +1505,7 @@ static unsigned net_tx_build_mops(struct xen_netbk *netbk)
 	return mop - netbk->tx_map_ops;
 }

-static void net_tx_submit(struct xen_netbk *netbk)
+static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 {
 	struct gnttab_map_grant_ref *mop;
 	struct sk_buff *skb;
@@ -1522,7 +1522,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
 		txp = &netbk->pending_tx_info[pending_idx].req;

 		/* Check the remap error code. */
-		if (unlikely(netbk_tx_check_mop(netbk, skb, &mop))) {
+		if (unlikely(xen_netbk_tx_check_mop(netbk, skb, &mop))) {
 			pr_debug("netback grant failed.\n");
 			skb_shinfo(skb)->nr_frags = 0;
 			kfree_skb(skb);
@@ -1539,7 +1539,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
 			txp->size -= data_len;
 		} else {
 			/* Schedule a response immediately. */
-			netif_idx_release(netbk, pending_idx);
+			xen_netbk_idx_release(netbk, pending_idx);
 		}

 		if (txp->flags & XEN_NETTXF_csum_blank)
@@ -1547,7 +1547,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
 		else if (txp->flags & XEN_NETTXF_data_validated)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;

-		netbk_fill_frags(netbk, skb);
+		xen_netbk_fill_frags(netbk, skb);

 		/*
 		 * If the initial fragment was < PKT_PROT_LEN then
@@ -1584,7 +1584,7 @@ static void net_tx_submit(struct xen_netbk *netbk)
 }

 /* Called after netfront has transmitted */
-static void net_tx_action(unsigned long data)
+static void xen_netbk_tx_action(unsigned long data)
 {
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	unsigned nr_mops;
@@ -1592,7 +1592,7 @@ static void net_tx_action(unsigned long data)

 	net_tx_action_dealloc(netbk);

-	nr_mops = net_tx_build_mops(netbk);
+	nr_mops = xen_netbk_tx_build_mops(netbk);

 	if (nr_mops == 0)
 		goto out;
@@ -1601,7 +1601,7 @@ static void net_tx_action(unsigned long data)
 					netbk->tx_map_ops, nr_mops);
 	BUG_ON(ret);

-	net_tx_submit(netbk);
+	xen_netbk_tx_submit(netbk);
 out:
 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
 	    !list_empty(&netbk->pending_inuse_head)) {
@@ -1614,7 +1614,7 @@ out:
 	}
 }

-static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
 {
 	static DEFINE_SPINLOCK(_lock);
 	unsigned long flags;
@@ -1634,12 +1634,12 @@ static void netif_idx_release(struct xen_netbk *netbk, u16 pending_idx)
 static void netif_page_release(struct page *page, unsigned int order)
 {
 	unsigned int group, idx;
-	int foreign = netif_get_page_ext(page, &group, &idx);
+	int foreign = get_page_ext(page, &group, &idx);

 	BUG_ON(!foreign);
 	BUG_ON(order);

-	netif_idx_release(&xen_netbk[group], idx);
+	xen_netbk_idx_release(&xen_netbk[group], idx);
 }

 irqreturn_t netif_be_int(int irq, void *dev_id)
@@ -1726,7 +1726,7 @@ static inline int tx_work_todo(struct xen_netbk *netbk)
 	return 0;
 }

-static int netbk_action_thread(void *data)
+static int xen_netbk_action_thread(void *data)
 {
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	while (!kthread_should_stop()) {
@@ -1740,10 +1740,10 @@ static int netbk_action_thread(void *data)
 			break;

 		if (rx_work_todo(netbk))
-			net_rx_action((unsigned long)netbk);
+			xen_netbk_rx_action((unsigned long)netbk);

 		if (tx_work_todo(netbk))
-			net_tx_action((unsigned long)netbk);
+			xen_netbk_tx_action((unsigned long)netbk);
 	}

 	return 0;
@@ -1794,7 +1794,7 @@ static int __init netback_init(void)
 		for (i = 0; i < MAX_PENDING_REQS; i++) {
 			page = netbk->mmap_pages[i];
 			SetPageForeign(page, netif_page_release);
-			netif_set_page_ext(page, group, i);
+			set_page_ext(page, group, i);
 			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
 		}

@@ -1806,7 +1806,7 @@ static int __init netback_init(void)
 		if (MODPARM_netback_kthread) {
 			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
 			netbk->kthread.task =
-				kthread_create(netbk_action_thread,
+				kthread_create(xen_netbk_action_thread,
 					       (void *)netbk,
 					       "netback/%u", group);

@@ -1824,10 +1824,10 @@ static int __init netback_init(void)
 			}
 		} else {
 			tasklet_init(&netbk->tasklet.net_tx_tasklet,
-				     net_tx_action,
+				     xen_netbk_tx_action,
 				     (unsigned long)netbk);
 			tasklet_init(&netbk->tasklet.net_rx_tasklet,
-				     net_rx_action,
+				     xen_netbk_rx_action,
 				     (unsigned long)netbk);
 		}

--
1.7.4


From 532b9510abeb41d7b37fe4d49d727db24c5e8178 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 20 Jan 2011 17:00:54 +0000
Subject: [PATCH 166/197] xen: netback: refactor to make all xen_netbk knowledge internal to netback.c

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/common.h    |   95 +---------------------------
 drivers/net/xen-netback/interface.c |   28 +--------
 drivers/net/xen-netback/netback.c   |  117 ++++++++++++++++++++++++++++++++++-
 3 files changed, 120 insertions(+), 120 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 2d727a0..32a9e4f 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -177,97 +177,8 @@ static inline int netbk_can_sg(struct net_device *dev)
 	return netif->can_sg;
 }

-struct pending_tx_info {
-	struct xen_netif_tx_request req;
-	struct xen_netif *netif;
-};
-typedef unsigned int pending_ring_idx_t;
-
-struct netbk_rx_meta {
-	int id;
-	int size;
-	int gso_size;
-};
-
-struct netbk_tx_pending_inuse {
-	struct list_head list;
-	unsigned long alloc_time;
-};
-
-#define MAX_PENDING_REQS 256
-
-#define MAX_BUFFER_OFFSET PAGE_SIZE
-
-/* extra field used in struct page */
-union page_ext {
-	struct {
-#if BITS_PER_LONG < 64
-#define IDX_WIDTH   8
-#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
-		unsigned int group:GROUP_WIDTH;
-		unsigned int idx:IDX_WIDTH;
-#else
-		unsigned int group, idx;
-#endif
-	} e;
-	void *mapping;
-};
-
-struct xen_netbk {
-	union {
-		struct {
-			struct tasklet_struct net_tx_tasklet;
-			struct tasklet_struct net_rx_tasklet;
-		} tasklet;
-
-		struct {
-			wait_queue_head_t netbk_action_wq;
-			struct task_struct *task;
-		} kthread;
-	};
-
-	struct sk_buff_head rx_queue;
-	struct sk_buff_head tx_queue;
-
-	struct timer_list net_timer;
-	struct timer_list netbk_tx_pending_timer;
-
-	struct page **mmap_pages;
-
-	pending_ring_idx_t pending_prod;
-	pending_ring_idx_t pending_cons;
-	pending_ring_idx_t dealloc_prod;
-	pending_ring_idx_t dealloc_cons;
-
-	struct list_head pending_inuse_head;
-	struct list_head net_schedule_list;
-
-	/* Protect the net_schedule_list in netif. */
-	spinlock_t net_schedule_list_lock;
-
-	atomic_t netfront_count;
-
-	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
-
-	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-	u16 pending_ring[MAX_PENDING_REQS];
-	u16 dealloc_ring[MAX_PENDING_REQS];
-
-	/*
-	 * Each head or fragment can be up to 4096 bytes. Given
-	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
-	 * head/fragment uses 2 copy operation.
-	 */
-	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
-	unsigned char rx_notify[NR_IRQS];
-	u16 notify_list[NET_RX_RING_SIZE];
-	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
-};
-
-extern struct xen_netbk *xen_netbk;
-extern int xen_netbk_group_nr;
+/* (De)Register a netif with the netback backend. */
+void xen_netbk_add_netif(struct xen_netif *netif);
+void xen_netbk_remove_netif(struct xen_netif *netif);

 #endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 28b0832..54ae275 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -38,33 +38,9 @@

 #define NETBK_QUEUE_LENGTH 32

-static void netbk_add_netif(struct xen_netbk *netbk, int group_nr,
-			   struct xen_netif *netif)
-{
-	int i;
-	int min_netfront_count;
-	int min_group = 0;
-	min_netfront_count = atomic_read(&netbk[0].netfront_count);
-	for (i = 0; i < group_nr; i++) {
-		int netfront_count = atomic_read(&netbk[i].netfront_count);
-		if (netfront_count < min_netfront_count) {
-			min_group = i;
-			min_netfront_count = netfront_count;
-		}
-	}
-
-	netif->group = min_group;
-	atomic_inc(&netbk[netif->group].netfront_count);
-}
-
-static void netbk_remove_netif(struct xen_netbk *netbk, struct xen_netif *netif)
-{
-	atomic_dec(&netbk[netif->group].netfront_count);
-}
-
 static void __netif_up(struct xen_netif *netif)
 {
-	netbk_add_netif(xen_netbk, xen_netbk_group_nr, netif);
+	xen_netbk_add_netif(netif);
 	enable_irq(netif->irq);
 	netif_schedule_work(netif);
 }
@@ -73,7 +49,7 @@ static void __netif_down(struct xen_netif *netif)
 {
 	disable_irq(netif->irq);
 	netif_deschedule_work(netif);
-	netbk_remove_netif(xen_netbk, netif);
+	xen_netbk_remove_netif(netif);
 }

 static int net_open(struct net_device *dev)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 6ed4459..d10ddbc 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -47,8 +47,121 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>

-struct xen_netbk *xen_netbk;
-int xen_netbk_group_nr;
+struct pending_tx_info {
+	struct xen_netif_tx_request req;
+	struct xen_netif *netif;
+};
+typedef unsigned int pending_ring_idx_t;
+
+struct netbk_rx_meta {
+	int id;
+	int size;
+	int gso_size;
+};
+
+struct netbk_tx_pending_inuse {
+	struct list_head list;
+	unsigned long alloc_time;
+};
+
+#define MAX_PENDING_REQS 256
+
+#define MAX_BUFFER_OFFSET PAGE_SIZE
+
+/* extra field used in struct page */
+union page_ext {
+	struct {
+#if BITS_PER_LONG < 64
+#define IDX_WIDTH   8
+#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
+		unsigned int group:GROUP_WIDTH;
+		unsigned int idx:IDX_WIDTH;
+#else
+		unsigned int group, idx;
+#endif
+	} e;
+	void *mapping;
+};
+
+struct xen_netbk {
+	union {
+		struct {
+			struct tasklet_struct net_tx_tasklet;
+			struct tasklet_struct net_rx_tasklet;
+		} tasklet;
+
+		struct {
+			wait_queue_head_t netbk_action_wq;
+			struct task_struct *task;
+		} kthread;
+	};
+
+	struct sk_buff_head rx_queue;
+	struct sk_buff_head tx_queue;
+
+	struct timer_list net_timer;
+	struct timer_list netbk_tx_pending_timer;
+
+	struct page **mmap_pages;
+
+	pending_ring_idx_t pending_prod;
+	pending_ring_idx_t pending_cons;
+	pending_ring_idx_t dealloc_prod;
+	pending_ring_idx_t dealloc_cons;
+
+	struct list_head pending_inuse_head;
+	struct list_head net_schedule_list;
+
+	/* Protect the net_schedule_list in netif. */
+	spinlock_t net_schedule_list_lock;
+
+	atomic_t netfront_count;
+
+	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
+	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+
+	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+	u16 pending_ring[MAX_PENDING_REQS];
+	u16 dealloc_ring[MAX_PENDING_REQS];
+
+	/*
+	 * Each head or fragment can be up to 4096 bytes. Given
+	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
+	 * head/fragment uses 2 copy operation.
+	 */
+	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
+	unsigned char rx_notify[NR_IRQS];
+	u16 notify_list[NET_RX_RING_SIZE];
+	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
+};
+
+static struct xen_netbk *xen_netbk;
+static int xen_netbk_group_nr;
+
+void xen_netbk_add_netif(struct xen_netif *netif)
+{
+	int i;
+	int min_netfront_count;
+	int min_group = 0;
+	min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
+	for (i = 0; i < xen_netbk_group_nr; i++) {
+		int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
+		if (netfront_count < min_netfront_count) {
+			min_group = i;
+			min_netfront_count = netfront_count;
+		}
+	}
+
+	netif->group = min_group;
+	atomic_inc(&xen_netbk[netif->group].netfront_count);
+}
+
+void xen_netbk_remove_netif(struct xen_netif *netif)
+{
+	atomic_dec(&xen_netbk[netif->group].netfront_count);
+}

 static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
 static void make_tx_response(struct xen_netif *netif,
--
1.7.4


From 650e09296e647d27284096af91e8bb5abf8c8662 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 20 Jan 2011 17:08:23 +0000
Subject: [PATCH 167/197] xen: netback: use xenvif_ prefix where appropriate

Avoids use of netif_ prefix which belongs to the networking core.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/common.h    |   65 +++---
 drivers/net/xen-netback/interface.c |  316 ++++++++++++++--------------
 drivers/net/xen-netback/netback.c   |  394 +++++++++++++++++------------------
 drivers/net/xen-netback/xenbus.c    |   72 +++----
 4 files changed, 422 insertions(+), 425 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 32a9e4f..f6da94b 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -46,7 +46,7 @@
 #include <xen/grant_table.h>
 #include <xen/xenbus.h>

-struct xen_netif {
+struct xenvif {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
 	int              group;
@@ -79,7 +79,7 @@ struct xen_netif {
 	/* Internal feature information. */
 	u8 can_queue:1;	    /* can queue packets for receiver? */

-	/* Allow netif_be_start_xmit() to peek ahead in the rx request
+	/* Allow xenvif_start_xmit() to peek ahead in the rx request
 	 * ring.  This is a prediction of what rx_req_cons will be once
 	 * all queued skbs are put on the ring. */
 	RING_IDX rx_req_cons_peek;
@@ -111,9 +111,9 @@ struct xen_netif {
  * immediately be called, which can cause packet loss; also the etherbridge
  * can be rather lazy in activating its port).
  */
-#define netback_carrier_on(netif)	((netif)->carrier = 1)
-#define netback_carrier_off(netif)	((netif)->carrier = 0)
-#define netback_carrier_ok(netif)	((netif)->carrier)
+#define xenvif_carrier_on(xenvif)	((xenvif)->carrier = 1)
+#define xenvif_carrier_off(xenvif)	((xenvif)->carrier = 0)
+#define xenvif_carrier_ok(xenvif)	((xenvif)->carrier)

 enum {
 	NETBK_DONT_COPY_SKB,
@@ -125,7 +125,7 @@ extern int netbk_copy_skb_mode;

 struct backend_info {
 	struct xenbus_device *dev;
-	struct xen_netif *netif;
+	struct xenvif *vif;
 	enum xenbus_state frontend_state;
 	struct xenbus_watch hotplug_status_watch;
 	int have_hotplug_status_watch:1;
@@ -134,51 +134,52 @@ struct backend_info {
 #define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
 #define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)

-void netif_disconnect(struct xen_netif *netif);
+void xenvif_disconnect(struct xenvif *vif);

-void netif_set_features(struct xen_netif *netif);
-struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
-			      unsigned int handle);
-int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
-	      unsigned long rx_ring_ref, unsigned int evtchn);
+void xenvif_set_features(struct xenvif *vif);
+struct xenvif *xenvif_alloc(struct device *parent,
+			    domid_t domid,
+			    unsigned int handle);
+int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
+		   unsigned long rx_ring_ref, unsigned int evtchn);

-static inline void netif_get(struct xen_netif *netif)
+static inline void xenvif_get(struct xenvif *vif)
 {
-	atomic_inc(&netif->refcnt);
+	atomic_inc(&vif->refcnt);
 }

-static inline void  netif_put(struct xen_netif *netif)
+static inline void xenvif_put(struct xenvif *vif)
 {
-	if (atomic_dec_and_test(&netif->refcnt))
-		wake_up(&netif->waiting_to_free);
+	if (atomic_dec_and_test(&vif->refcnt))
+		wake_up(&vif->waiting_to_free);
 }

-int netif_xenbus_init(void);
+int xenvif_xenbus_init(void);

-#define netif_schedulable(netif)				\
-	(netif_running((netif)->dev) && netback_carrier_ok(netif))
+#define xenvif_schedulable(vif)	\
+	(netif_running((vif)->dev) && xenvif_carrier_ok(vif))

-void netif_schedule_work(struct xen_netif *netif);
-void netif_deschedule_work(struct xen_netif *netif);
+void xenvif_schedule_work(struct xenvif *vif);
+void xenvif_deschedule_work(struct xenvif *vif);

-int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
-struct net_device_stats *netif_be_get_stats(struct net_device *dev);
-irqreturn_t netif_be_int(int irq, void *dev_id);
+int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev);
+struct net_device_stats *xenvif_get_stats(struct net_device *dev);
+irqreturn_t xenvif_interrupt(int irq, void *dev_id);

 static inline int netbk_can_queue(struct net_device *dev)
 {
-	struct xen_netif *netif = netdev_priv(dev);
-	return netif->can_queue;
+	struct xenvif *vif = netdev_priv(dev);
+	return vif->can_queue;
 }

 static inline int netbk_can_sg(struct net_device *dev)
 {
-	struct xen_netif *netif = netdev_priv(dev);
-	return netif->can_sg;
+	struct xenvif *vif = netdev_priv(dev);
+	return vif->can_sg;
 }

-/* (De)Register a netif with the netback backend. */
-void xen_netbk_add_netif(struct xen_netif *netif);
-void xen_netbk_remove_netif(struct xen_netif *netif);
+/* (De)Register a xenvif with the netback backend. */
+void xen_netbk_add_xenvif(struct xenvif *vif);
+void xen_netbk_remove_xenvif(struct xenvif *vif);

 #endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 54ae275..a065173 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -38,40 +38,40 @@

 #define NETBK_QUEUE_LENGTH 32

-static void __netif_up(struct xen_netif *netif)
+static void xenvif_up(struct xenvif *vif)
 {
-	xen_netbk_add_netif(netif);
-	enable_irq(netif->irq);
-	netif_schedule_work(netif);
+	xen_netbk_add_xenvif(vif);
+	enable_irq(vif->irq);
+	xenvif_schedule_work(vif);
 }

-static void __netif_down(struct xen_netif *netif)
+static void xenvif_down(struct xenvif *vif)
 {
-	disable_irq(netif->irq);
-	netif_deschedule_work(netif);
-	xen_netbk_remove_netif(netif);
+	disable_irq(vif->irq);
+	xenvif_deschedule_work(vif);
+	xen_netbk_remove_xenvif(vif);
 }

-static int net_open(struct net_device *dev)
+static int xenvif_open(struct net_device *dev)
 {
-	struct xen_netif *netif = netdev_priv(dev);
-	if (netback_carrier_ok(netif)) {
-		__netif_up(netif);
+	struct xenvif *vif = netdev_priv(dev);
+	if (xenvif_carrier_ok(vif)) {
+		xenvif_up(vif);
 		netif_start_queue(dev);
 	}
 	return 0;
 }

-static int net_close(struct net_device *dev)
+static int xenvif_close(struct net_device *dev)
 {
-	struct xen_netif *netif = netdev_priv(dev);
-	if (netback_carrier_ok(netif))
-		__netif_down(netif);
+	struct xenvif *vif = netdev_priv(dev);
+	if (xenvif_carrier_ok(vif))
+		xenvif_down(vif);
 	netif_stop_queue(dev);
 	return 0;
 }

-static int netbk_change_mtu(struct net_device *dev, int mtu)
+static int xenvif_change_mtu(struct net_device *dev, int mtu)
 {
 	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;

@@ -81,19 +81,19 @@ static int netbk_change_mtu(struct net_device *dev, int mtu)
 	return 0;
 }

-void netif_set_features(struct xen_netif *netif)
+void xenvif_set_features(struct xenvif *vif)
 {
-	struct net_device *dev = netif->dev;
+	struct net_device *dev = vif->dev;
 	int features = dev->features;

-	if (netif->can_sg)
+	if (vif->can_sg)
 		features |= NETIF_F_SG;
-	if (netif->gso || netif->gso_prefix)
+	if (vif->gso || vif->gso_prefix)
 		features |= NETIF_F_TSO;
-	if (netif->csum)
+	if (vif->csum)
 		features |= NETIF_F_IP_CSUM;

-	features &= ~(netif->features_disabled);
+	features &= ~(vif->features_disabled);

 	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
 		dev->mtu = ETH_DATA_LEN;
@@ -101,130 +101,130 @@ void netif_set_features(struct xen_netif *netif)
 	dev->features = features;
 }

-static int netbk_set_tx_csum(struct net_device *dev, u32 data)
+static int xenvif_set_tx_csum(struct net_device *dev, u32 data)
 {
-	struct xen_netif *netif = netdev_priv(dev);
+	struct xenvif *vif = netdev_priv(dev);
 	if (data) {
-		if (!netif->csum)
+		if (!vif->csum)
 			return -EOPNOTSUPP;
-		netif->features_disabled &= ~NETIF_F_IP_CSUM;
+		vif->features_disabled &= ~NETIF_F_IP_CSUM;
 	} else {
-		netif->features_disabled |= NETIF_F_IP_CSUM;
+		vif->features_disabled |= NETIF_F_IP_CSUM;
 	}

-	netif_set_features(netif);
+	xenvif_set_features(vif);
 	return 0;
 }

-static int netbk_set_sg(struct net_device *dev, u32 data)
+static int xenvif_set_sg(struct net_device *dev, u32 data)
 {
-	struct xen_netif *netif = netdev_priv(dev);
+	struct xenvif *vif = netdev_priv(dev);
 	if (data) {
-		if (!netif->can_sg)
+		if (!vif->can_sg)
 			return -EOPNOTSUPP;
-		netif->features_disabled &= ~NETIF_F_SG;
+		vif->features_disabled &= ~NETIF_F_SG;
 	} else {
-		netif->features_disabled |= NETIF_F_SG;
+		vif->features_disabled |= NETIF_F_SG;
 	}

-	netif_set_features(netif);
+	xenvif_set_features(vif);
 	return 0;
 }

-static int netbk_set_tso(struct net_device *dev, u32 data)
+static int xenvif_set_tso(struct net_device *dev, u32 data)
 {
-	struct xen_netif *netif = netdev_priv(dev);
+	struct xenvif *vif = netdev_priv(dev);
 	if (data) {
-		if (!netif->gso && !netif->gso_prefix)
+		if (!vif->gso && !vif->gso_prefix)
 			return -EOPNOTSUPP;
-		netif->features_disabled &= ~NETIF_F_TSO;
+		vif->features_disabled &= ~NETIF_F_TSO;
 	} else {
-		netif->features_disabled |= NETIF_F_TSO;
+		vif->features_disabled |= NETIF_F_TSO;
 	}

-	netif_set_features(netif);
+	xenvif_set_features(vif);
 	return 0;
 }

-static const struct netif_stat {
+static const struct xenvif_stat {
 	char name[ETH_GSTRING_LEN];
 	u16 offset;
-} netbk_stats[] = {
+} xenvif_stats[] = {
 	{
 		"copied_skbs",
-		offsetof(struct xen_netif, nr_copied_skbs)
+		offsetof(struct xenvif, nr_copied_skbs)
 	},
 	{
 		"rx_gso_checksum_fixup",
-		offsetof(struct xen_netif, rx_gso_checksum_fixup)
+		offsetof(struct xenvif, rx_gso_checksum_fixup)
 	},
 };

-static int netbk_get_sset_count(struct net_device *dev, int string_set)
+static int xenvif_get_sset_count(struct net_device *dev, int string_set)
 {
 	switch (string_set) {
 	case ETH_SS_STATS:
-		return ARRAY_SIZE(netbk_stats);
+		return ARRAY_SIZE(xenvif_stats);
 	default:
 		return -EINVAL;
 	}
 }

-static void netbk_get_ethtool_stats(struct net_device *dev,
-				   struct ethtool_stats *stats, u64 * data)
+static void xenvif_get_ethtool_stats(struct net_device *dev,
+				     struct ethtool_stats *stats, u64 * data)
 {
-	void *netif = netdev_priv(dev);
+	void *vif = netdev_priv(dev);
 	int i;

-	for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
-		data[i] = *(int *)(netif + netbk_stats[i].offset);
+	for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
+		data[i] = *(int *)(vif + xenvif_stats[i].offset);
 }

-static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
+static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
 {
 	int i;

 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
+		for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++)
 			memcpy(data + i * ETH_GSTRING_LEN,
-			       netbk_stats[i].name, ETH_GSTRING_LEN);
+			       xenvif_stats[i].name, ETH_GSTRING_LEN);
 		break;
 	}
 }

-static struct ethtool_ops network_ethtool_ops = {
-	.get_tx_csum = ethtool_op_get_tx_csum,
-	.set_tx_csum = netbk_set_tx_csum,
-	.get_sg = ethtool_op_get_sg,
-	.set_sg = netbk_set_sg,
-	.get_tso = ethtool_op_get_tso,
-	.set_tso = netbk_set_tso,
-	.get_link = ethtool_op_get_link,
-
-	.get_sset_count = netbk_get_sset_count,
-	.get_ethtool_stats = netbk_get_ethtool_stats,
-	.get_strings = netbk_get_strings,
+static struct ethtool_ops xenvif_ethtool_ops = {
+	.get_tx_csum	= ethtool_op_get_tx_csum,
+	.set_tx_csum	= xenvif_set_tx_csum,
+	.get_sg		= ethtool_op_get_sg,
+	.set_sg		= xenvif_set_sg,
+	.get_tso	= ethtool_op_get_tso,
+	.set_tso	= xenvif_set_tso,
+	.get_link	= ethtool_op_get_link,
+
+	.get_sset_count = xenvif_get_sset_count,
+	.get_ethtool_stats = xenvif_get_ethtool_stats,
+	.get_strings = xenvif_get_strings,
 };

-static struct net_device_ops netback_ops = {
-	.ndo_start_xmit	= netif_be_start_xmit,
-	.ndo_get_stats	= netif_be_get_stats,
-	.ndo_open	= net_open,
-	.ndo_stop	= net_close,
-	.ndo_change_mtu	= netbk_change_mtu,
+static struct net_device_ops xenvif_netdev_ops = {
+	.ndo_start_xmit	= xenvif_start_xmit,
+	.ndo_get_stats	= xenvif_get_stats,
+	.ndo_open	= xenvif_open,
+	.ndo_stop	= xenvif_close,
+	.ndo_change_mtu	= xenvif_change_mtu,
 };

-struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
-			      unsigned int handle)
+struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
+			    unsigned int handle)
 {
 	int err = 0;
 	struct net_device *dev;
-	struct xen_netif *netif;
+	struct xenvif *vif;
 	char name[IFNAMSIZ] = {};

 	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
-	dev = alloc_netdev(sizeof(struct xen_netif), name, ether_setup);
+	dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup);
 	if (dev == NULL) {
 		pr_debug("Could not allocate netdev\n");
 		return ERR_PTR(-ENOMEM);
@@ -232,29 +232,29 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,

 	SET_NETDEV_DEV(dev, parent);

-	netif = netdev_priv(dev);
-	memset(netif, 0, sizeof(*netif));
-	netif->domid  = domid;
-	netif->group  = -1;
-	netif->handle = handle;
-	netif->can_sg = 1;
-	netif->csum = 1;
-	atomic_set(&netif->refcnt, 1);
-	init_waitqueue_head(&netif->waiting_to_free);
-	netif->dev = dev;
-	INIT_LIST_HEAD(&netif->list);
-
-	netback_carrier_off(netif);
-
-	netif->credit_bytes = netif->remaining_credit = ~0UL;
-	netif->credit_usec  = 0UL;
-	init_timer(&netif->credit_timeout);
+	vif = netdev_priv(dev);
+	memset(vif, 0, sizeof(*vif));
+	vif->domid  = domid;
+	vif->group  = -1;
+	vif->handle = handle;
+	vif->can_sg = 1;
+	vif->csum = 1;
+	atomic_set(&vif->refcnt, 1);
+	init_waitqueue_head(&vif->waiting_to_free);
+	vif->dev = dev;
+	INIT_LIST_HEAD(&vif->list);
+
+	xenvif_carrier_off(vif);
+
+	vif->credit_bytes = vif->remaining_credit = ~0UL;
+	vif->credit_usec  = 0UL;
+	init_timer(&vif->credit_timeout);
 	/* Initialize 'expires' now: it's used to track the credit window. */
-	netif->credit_timeout.expires = jiffies;
+	vif->credit_timeout.expires = jiffies;

-	dev->netdev_ops	= &netback_ops;
-	netif_set_features(netif);
-	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+	dev->netdev_ops	= &xenvif_netdev_ops;
+	xenvif_set_features(vif);
+	SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);

 	dev->tx_queue_len = NETBK_QUEUE_LENGTH;

@@ -277,18 +277,18 @@ struct xen_netif *netif_alloc(struct device *parent, domid_t domid,
 		return ERR_PTR(err);
 	}

-	pr_debug("Successfully created netif\n");
-	return netif;
+	pr_debug("Successfully created xenvif\n");
+	return vif;
 }

-static int map_frontend_pages(struct xen_netif *netif,
+static int map_frontend_pages(struct xenvif *vif,
 			      grant_ref_t tx_ring_ref,
 			      grant_ref_t rx_ring_ref)
 {
 	struct gnttab_map_grant_ref op;

-	gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
-			  GNTMAP_host_map, tx_ring_ref, netif->domid);
+	gnttab_set_map_op(&op, (unsigned long)vif->tx_comms_area->addr,
+			  GNTMAP_host_map, tx_ring_ref, vif->domid);

 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
 		BUG();
@@ -298,11 +298,11 @@ static int map_frontend_pages(struct xen_netif *netif,
 		return op.status;
 	}

-	netif->tx_shmem_ref    = tx_ring_ref;
-	netif->tx_shmem_handle = op.handle;
+	vif->tx_shmem_ref    = tx_ring_ref;
+	vif->tx_shmem_handle = op.handle;

-	gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
-			  GNTMAP_host_map, rx_ring_ref, netif->domid);
+	gnttab_set_map_op(&op, (unsigned long)vif->rx_comms_area->addr,
+			  GNTMAP_host_map, rx_ring_ref, vif->domid);

 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
 		BUG();
@@ -311,120 +311,120 @@ static int map_frontend_pages(struct xen_netif *netif,
 		struct gnttab_unmap_grant_ref unop;

 		gnttab_set_unmap_op(&unop,
-				    (unsigned long)netif->tx_comms_area->addr,
-				    GNTMAP_host_map, netif->tx_shmem_handle);
+				    (unsigned long)vif->tx_comms_area->addr,
+				    GNTMAP_host_map, vif->tx_shmem_handle);
 		HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1);
 		pr_debug("Gnttab failure mapping rx_ring_ref!\n");
 		return op.status;
 	}

-	netif->rx_shmem_ref    = rx_ring_ref;
-	netif->rx_shmem_handle = op.handle;
+	vif->rx_shmem_ref    = rx_ring_ref;
+	vif->rx_shmem_handle = op.handle;

 	return 0;
 }

-static void unmap_frontend_pages(struct xen_netif *netif)
+static void unmap_frontend_pages(struct xenvif *vif)
 {
 	struct gnttab_unmap_grant_ref op;

-	gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
-			    GNTMAP_host_map, netif->tx_shmem_handle);
+	gnttab_set_unmap_op(&op, (unsigned long)vif->tx_comms_area->addr,
+			    GNTMAP_host_map, vif->tx_shmem_handle);

 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
 		BUG();

-	gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
-			    GNTMAP_host_map, netif->rx_shmem_handle);
+	gnttab_set_unmap_op(&op, (unsigned long)vif->rx_comms_area->addr,
+			    GNTMAP_host_map, vif->rx_shmem_handle);

 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
 		BUG();
 }

-int netif_map(struct xen_netif *netif, unsigned long tx_ring_ref,
-	      unsigned long rx_ring_ref, unsigned int evtchn)
+int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
+		   unsigned long rx_ring_ref, unsigned int evtchn)
 {
 	int err = -ENOMEM;
 	struct xen_netif_tx_sring *txs;
 	struct xen_netif_rx_sring *rxs;

 	/* Already connected through? */
-	if (netif->irq)
+	if (vif->irq)
 		return 0;

-	netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
-	if (netif->tx_comms_area == NULL)
+	vif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
+	if (vif->tx_comms_area == NULL)
 		return -ENOMEM;
-	netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
-	if (netif->rx_comms_area == NULL)
+	vif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
+	if (vif->rx_comms_area == NULL)
 		goto err_rx;

-	err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
+	err = map_frontend_pages(vif, tx_ring_ref, rx_ring_ref);
 	if (err)
 		goto err_map;

 	err = bind_interdomain_evtchn_to_irqhandler(
-		netif->domid, evtchn, netif_be_int, 0,
-		netif->dev->name, netif);
+		vif->domid, evtchn, xenvif_interrupt, 0,
+		vif->dev->name, vif);
 	if (err < 0)
 		goto err_hypervisor;
-	netif->irq = err;
-	disable_irq(netif->irq);
+	vif->irq = err;
+	disable_irq(vif->irq);

-	txs = (struct xen_netif_tx_sring *)netif->tx_comms_area->addr;
-	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+	txs = (struct xen_netif_tx_sring *)vif->tx_comms_area->addr;
+	BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);

 	rxs = (struct xen_netif_rx_sring *)
-		((char *)netif->rx_comms_area->addr);
-	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+		((char *)vif->rx_comms_area->addr);
+	BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);

-	netif->rx_req_cons_peek = 0;
+	vif->rx_req_cons_peek = 0;

-	netif_get(netif);
+	xenvif_get(vif);

 	rtnl_lock();
-	netback_carrier_on(netif);
-	if (netif_running(netif->dev))
-		__netif_up(netif);
+	xenvif_carrier_on(vif);
+	if (netif_running(vif->dev))
+		xenvif_up(vif);
 	rtnl_unlock();

 	return 0;
 err_hypervisor:
-	unmap_frontend_pages(netif);
+	unmap_frontend_pages(vif);
 err_map:
-	free_vm_area(netif->rx_comms_area);
+	free_vm_area(vif->rx_comms_area);
 err_rx:
-	free_vm_area(netif->tx_comms_area);
+	free_vm_area(vif->tx_comms_area);
 	return err;
 }

-void netif_disconnect(struct xen_netif *netif)
+void xenvif_disconnect(struct xenvif *vif)
 {
-	if (netback_carrier_ok(netif)) {
+	if (xenvif_carrier_ok(vif)) {
 		rtnl_lock();
-		netback_carrier_off(netif);
-		netif_carrier_off(netif->dev); /* discard queued packets */
-		if (netif_running(netif->dev))
-			__netif_down(netif);
+		xenvif_carrier_off(vif);
+		netif_carrier_off(vif->dev); /* discard queued packets */
+		if (netif_running(vif->dev))
+			xenvif_down(vif);
 		rtnl_unlock();
-		netif_put(netif);
+		xenvif_put(vif);
 	}

-	atomic_dec(&netif->refcnt);
-	wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
+	atomic_dec(&vif->refcnt);
+	wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0);

-	del_timer_sync(&netif->credit_timeout);
+	del_timer_sync(&vif->credit_timeout);

-	if (netif->irq)
-		unbind_from_irqhandler(netif->irq, netif);
+	if (vif->irq)
+		unbind_from_irqhandler(vif->irq, vif);

-	unregister_netdev(netif->dev);
+	unregister_netdev(vif->dev);

-	if (netif->tx.sring) {
-		unmap_frontend_pages(netif);
-		free_vm_area(netif->tx_comms_area);
-		free_vm_area(netif->rx_comms_area);
+	if (vif->tx.sring) {
+		unmap_frontend_pages(vif);
+		free_vm_area(vif->tx_comms_area);
+		free_vm_area(vif->rx_comms_area);
 	}

-	free_netdev(netif->dev);
+	free_netdev(vif->dev);
 }
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index d10ddbc..f2f9c6f 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -49,7 +49,7 @@

 struct pending_tx_info {
 	struct xen_netif_tx_request req;
-	struct xen_netif *netif;
+	struct xenvif *vif;
 };
 typedef unsigned int pending_ring_idx_t;

@@ -140,7 +140,7 @@ struct xen_netbk {
 static struct xen_netbk *xen_netbk;
 static int xen_netbk_group_nr;

-void xen_netbk_add_netif(struct xen_netif *netif)
+void xen_netbk_add_xenvif(struct xenvif *vif)
 {
 	int i;
 	int min_netfront_count;
@@ -154,20 +154,20 @@ void xen_netbk_add_netif(struct xen_netif *netif)
 		}
 	}

-	netif->group = min_group;
-	atomic_inc(&xen_netbk[netif->group].netfront_count);
+	vif->group = min_group;
+	atomic_inc(&xen_netbk[vif->group].netfront_count);
 }

-void xen_netbk_remove_netif(struct xen_netif *netif)
+void xen_netbk_remove_xenvif(struct xenvif *vif)
 {
-	atomic_dec(&xen_netbk[netif->group].netfront_count);
+	atomic_dec(&xen_netbk[vif->group].netfront_count);
 }

 static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
-static void make_tx_response(struct xen_netif *netif,
+static void make_tx_response(struct xenvif *vif,
 			     struct xen_netif_tx_request *txp,
 			     s8       st);
-static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
 					     u16      id,
 					     s8       st,
 					     u16      offset,
@@ -369,20 +369,20 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
 	return NULL;
 }

-static inline int netbk_max_required_rx_slots(struct xen_netif *netif)
+static inline int xenvif_max_required_rx_slots(struct xenvif *vif)
 {
-	if (netif->can_sg || netif->gso || netif->gso_prefix)
+	if (vif->can_sg || vif->gso || vif->gso_prefix)
 		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
 	return 1; /* all in one */
 }

-static inline int netbk_queue_full(struct xen_netif *netif)
+static inline int xenvif_queue_full(struct xenvif *vif)
 {
-	RING_IDX peek   = netif->rx_req_cons_peek;
-	RING_IDX needed = netbk_max_required_rx_slots(netif);
+	RING_IDX peek   = vif->rx_req_cons_peek;
+	RING_IDX needed = xenvif_max_required_rx_slots(vif);

-	return ((netif->rx.sring->req_prod - peek) < needed) ||
-	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+	return ((vif->rx.sring->req_prod - peek) < needed) ||
+	       ((vif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
 }

 /*
@@ -430,7 +430,7 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
  * the guest. This function is essentially a dry run of
  * netbk_gop_frag_copy.
  */
-static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif)
+static unsigned int count_skb_slots(struct sk_buff *skb, struct xenvif *vif)
 {
 	unsigned int count = 1;
 	int i, copy_off = 0;
@@ -464,20 +464,20 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xen_netif *netif
 	return count;
 }

-int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct xen_netif *netif = netdev_priv(dev);
+	struct xenvif *vif = netdev_priv(dev);
 	struct xen_netbk *netbk;

 	BUG_ON(skb->dev != dev);

-	if (netif->group == -1)
+	if (vif->group == -1)
 		goto drop;

-	netbk = &xen_netbk[netif->group];
+	netbk = &xen_netbk[vif->group];

 	/* Drop the packet if the target domain has no receive buffers. */
-	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
+	if (unlikely(!xenvif_schedulable(vif) || xenvif_queue_full(vif)))
 		goto drop;

 	/*
@@ -496,14 +496,14 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}

 	/* Reserve ring slots for the worst-case number of fragments. */
-	netif->rx_req_cons_peek += count_skb_slots(skb, netif);
-	netif_get(netif);
+	vif->rx_req_cons_peek += count_skb_slots(skb, vif);
+	xenvif_get(vif);

-	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
-		netif->rx.sring->req_event = netif->rx_req_cons_peek +
-			netbk_max_required_rx_slots(netif);
+	if (netbk_can_queue(dev) && xenvif_queue_full(vif)) {
+		vif->rx.sring->req_event = vif->rx_req_cons_peek +
+			xenvif_max_required_rx_slots(vif);
 		mb(); /* request notification /then/ check & stop the queue */
-		if (netbk_queue_full(netif))
+		if (xenvif_queue_full(vif))
 			netif_stop_queue(dev);
 	}
 	skb_queue_tail(&netbk->rx_queue, skb);
@@ -513,7 +513,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;

  drop:
-	netif->stats.tx_dropped++;
+	vif->stats.tx_dropped++;
 	dev_kfree_skb(skb);
 	return 0;
 }
@@ -527,13 +527,13 @@ struct netrx_pending_operations {
 	grant_ref_t copy_gref;
 };

-static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
+static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif,
 						struct netrx_pending_operations *npo)
 {
 	struct netbk_rx_meta *meta;
 	struct xen_netif_rx_request *req;

-	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);

 	meta = npo->meta + npo->meta_prod++;
 	meta->gso_size = 0;
@@ -550,7 +550,7 @@ static struct netbk_rx_meta *get_next_rx_buffer(struct xen_netif *netif,
  * Set up the grant operations for this fragment. If it's a flipping
  * interface, we also set up the unmap request from here.
  */
-static void netbk_gop_frag_copy(struct xen_netif *netif,
+static void netbk_gop_frag_copy(struct xenvif *vif,
 				struct netrx_pending_operations *npo,
 				struct page *page, unsigned long size,
 				unsigned long offset, int head)
@@ -580,7 +580,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 			 */
 			BUG_ON(head);

-			meta = get_next_rx_buffer(netif, npo);
+			meta = get_next_rx_buffer(vif, npo);
 		}

 		bytes = size;
@@ -595,7 +595,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,

 			src_pend = &netbk->pending_tx_info[idx];

-			copy_gop->source.domid = src_pend->netif->domid;
+			copy_gop->source.domid = src_pend->vif->domid;
 			copy_gop->source.u.ref = src_pend->req.gref;
 			copy_gop->flags |= GNTCOPY_source_gref;
 		} else {
@@ -604,7 +604,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 			copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
 		}
 		copy_gop->source.offset = offset;
-		copy_gop->dest.domid = netif->domid;
+		copy_gop->dest.domid = vif->domid;

 		copy_gop->dest.offset = npo->copy_off;
 		copy_gop->dest.u.ref = npo->copy_gref;
@@ -634,7 +634,7 @@ static void netbk_gop_frag_copy(struct xen_netif *netif,
 static int netbk_gop_skb(struct sk_buff *skb,
 			 struct netrx_pending_operations *npo)
 {
-	struct xen_netif *netif = netdev_priv(skb->dev);
+	struct xenvif *vif = netdev_priv(skb->dev);
 	int nr_frags = skb_shinfo(skb)->nr_frags;
 	int i;
 	struct xen_netif_rx_request *req;
@@ -644,18 +644,18 @@ static int netbk_gop_skb(struct sk_buff *skb,
 	old_meta_prod = npo->meta_prod;

 	/* Set up a GSO prefix descriptor, if necessary */
-	if (skb_shinfo(skb)->gso_size && netif->gso_prefix) {
-		req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+	if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {
+		req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
 		meta = npo->meta + npo->meta_prod++;
 		meta->gso_size = skb_shinfo(skb)->gso_size;
 		meta->size = 0;
 		meta->id = req->id;
 	}

-	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+	req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
 	meta = npo->meta + npo->meta_prod++;

-	if (!netif->gso_prefix)
+	if (!vif->gso_prefix)
 		meta->gso_size = skb_shinfo(skb)->gso_size;
 	else
 		meta->gso_size = 0;
@@ -665,17 +665,16 @@ static int netbk_gop_skb(struct sk_buff *skb,
 	npo->copy_off = 0;
 	npo->copy_gref = req->gref;

-	netbk_gop_frag_copy(netif,
-			    npo, virt_to_page(skb->data),
+	netbk_gop_frag_copy(vif, npo, virt_to_page(skb->data),
 			    skb_headlen(skb),
 			    offset_in_page(skb->data), 1);

 	/* Leave a gap for the GSO descriptor. */
-	if (skb_shinfo(skb)->gso_size && !netif->gso_prefix)
-		netif->rx.req_cons++;
+	if (skb_shinfo(skb)->gso_size && !vif->gso_prefix)
+		vif->rx.req_cons++;

 	for (i = 0; i < nr_frags; i++) {
-		netbk_gop_frag_copy(netif, npo,
+		netbk_gop_frag_copy(vif, npo,
 				    skb_shinfo(skb)->frags[i].page,
 				    skb_shinfo(skb)->frags[i].size,
 				    skb_shinfo(skb)->frags[i].page_offset,
@@ -710,7 +709,7 @@ static int netbk_check_gop(int nr_meta_slots, domid_t domid,
 	return status;
 }

-static void netbk_add_frag_responses(struct xen_netif *netif, int status,
+static void netbk_add_frag_responses(struct xenvif *vif, int status,
 				     struct netbk_rx_meta *meta,
 				     int nr_meta_slots)
 {
@@ -731,7 +730,7 @@ static void netbk_add_frag_responses(struct xen_netif *netif, int status,
 			flags = XEN_NETRXF_more_data;

 		offset = 0;
-		make_rx_response(netif, meta[i].id, status, offset,
+		make_rx_response(vif, meta[i].id, status, offset,
 				 meta[i].size, flags);
 	}
 }
@@ -742,7 +741,7 @@ struct skb_cb_overlay {

 static void xen_netbk_rx_action(unsigned long data)
 {
-	struct xen_netif *netif = NULL;
+	struct xenvif *vif = NULL;
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	s8 status;
 	u16 irq, flags;
@@ -766,7 +765,7 @@ static void xen_netbk_rx_action(unsigned long data)
 	count = 0;

 	while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
-		netif = netdev_priv(skb->dev);
+		vif = netdev_priv(skb->dev);
 		nr_frags = skb_shinfo(skb)->nr_frags;

 		sco = (struct skb_cb_overlay *)skb->cb;
@@ -794,11 +793,11 @@ static void xen_netbk_rx_action(unsigned long data)
 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
 		sco = (struct skb_cb_overlay *)skb->cb;

-		netif = netdev_priv(skb->dev);
+		vif = netdev_priv(skb->dev);

-		if (netbk->meta[npo.meta_cons].gso_size && netif->gso_prefix) {
-			resp = RING_GET_RESPONSE(&netif->rx,
-						netif->rx.rsp_prod_pvt++);
+		if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
+			resp = RING_GET_RESPONSE(&vif->rx,
+						vif->rx.rsp_prod_pvt++);

 			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;

@@ -811,11 +810,11 @@ static void xen_netbk_rx_action(unsigned long data)
 		}


-		netif->stats.tx_bytes += skb->len;
-		netif->stats.tx_packets++;
+		vif->stats.tx_bytes += skb->len;
+		vif->stats.tx_packets++;

 		status = netbk_check_gop(sco->meta_slots_used,
-					 netif->domid, &npo);
+					 vif->domid, &npo);

 		if (sco->meta_slots_used == 1)
 			flags = 0;
@@ -829,16 +828,16 @@ static void xen_netbk_rx_action(unsigned long data)
 			flags |= XEN_NETRXF_data_validated;

 		offset = 0;
-		resp = make_rx_response(netif, netbk->meta[npo.meta_cons].id,
+		resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
 					status, offset,
 					netbk->meta[npo.meta_cons].size,
 					flags);

-		if (netbk->meta[npo.meta_cons].gso_size && !netif->gso_prefix) {
+		if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
 			struct xen_netif_extra_info *gso =
 				(struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&netif->rx,
-						  netif->rx.rsp_prod_pvt++);
+				RING_GET_RESPONSE(&vif->rx,
+						  vif->rx.rsp_prod_pvt++);

 			resp->flags |= XEN_NETRXF_extra_info;

@@ -851,23 +850,23 @@ static void xen_netbk_rx_action(unsigned long data)
 			gso->flags = 0;
 		}

-		netbk_add_frag_responses(netif, status,
+		netbk_add_frag_responses(vif, status,
 					 netbk->meta + npo.meta_cons + 1,
 					 sco->meta_slots_used);

-		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
-		irq = netif->irq;
+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
+		irq = vif->irq;
 		if (ret && !netbk->rx_notify[irq]) {
 			netbk->rx_notify[irq] = 1;
 			netbk->notify_list[notify_nr++] = irq;
 		}

-		if (netif_queue_stopped(netif->dev) &&
-		    netif_schedulable(netif) &&
-		    !netbk_queue_full(netif))
-			netif_wake_queue(netif->dev);
+		if (netif_queue_stopped(vif->dev) &&
+		    xenvif_schedulable(vif) &&
+		    !xenvif_queue_full(vif))
+			netif_wake_queue(vif->dev);

-		netif_put(netif);
+		xenvif_put(vif);
 		npo.meta_cons += sco->meta_slots_used;
 		dev_kfree_skb(skb);
 	}
@@ -890,93 +889,92 @@ static void net_alarm(unsigned long data)
 	xen_netbk_bh_handler(netbk, 1);
 }

-static void netbk_tx_pending_timeout(unsigned long data)
+static void xen_netbk_tx_pending_timeout(unsigned long data)
 {
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	xen_netbk_bh_handler(netbk, 0);
 }

-struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 {
-	struct xen_netif *netif = netdev_priv(dev);
-	return &netif->stats;
+	struct xenvif *vif = netdev_priv(dev);
+	return &vif->stats;
 }

-static int __on_net_schedule_list(struct xen_netif *netif)
+static int __on_net_schedule_list(struct xenvif *vif)
 {
-	return !list_empty(&netif->list);
+	return !list_empty(&vif->list);
 }

 /* Must be called with net_schedule_list_lock held */
-static void remove_from_net_schedule_list(struct xen_netif *netif)
+static void remove_from_net_schedule_list(struct xenvif *vif)
 {
-	if (likely(__on_net_schedule_list(netif))) {
-		list_del_init(&netif->list);
-		netif_put(netif);
+	if (likely(__on_net_schedule_list(vif))) {
+		list_del_init(&vif->list);
+		xenvif_put(vif);
 	}
 }

-static struct xen_netif *poll_net_schedule_list(struct xen_netbk *netbk)
+static struct xenvif *poll_net_schedule_list(struct xen_netbk *netbk)
 {
-	struct xen_netif *netif = NULL;
+	struct xenvif *vif = NULL;

 	spin_lock_irq(&netbk->net_schedule_list_lock);
 	if (list_empty(&netbk->net_schedule_list))
 		goto out;

-	netif = list_first_entry(&netbk->net_schedule_list,
-				 struct xen_netif, list);
-	if (!netif)
+	vif = list_first_entry(&netbk->net_schedule_list,
+			       struct xenvif, list);
+	if (!vif)
 		goto out;

-	netif_get(netif);
+	xenvif_get(vif);

-	remove_from_net_schedule_list(netif);
+	remove_from_net_schedule_list(vif);
 out:
 	spin_unlock_irq(&netbk->net_schedule_list_lock);
-	return netif;
+	return vif;
 }

-static void add_to_net_schedule_list_tail(struct xen_netif *netif)
+static void add_to_net_schedule_list_tail(struct xenvif *vif)
 {
 	unsigned long flags;

-	struct xen_netbk *netbk = &xen_netbk[netif->group];
-	if (__on_net_schedule_list(netif))
+	struct xen_netbk *netbk = &xen_netbk[vif->group];
+	if (__on_net_schedule_list(vif))
 		return;

 	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
-	if (!__on_net_schedule_list(netif) &&
-	    likely(netif_schedulable(netif))) {
-		list_add_tail(&netif->list, &netbk->net_schedule_list);
-		netif_get(netif);
+	if (!__on_net_schedule_list(vif) &&
+	    likely(xenvif_schedulable(vif))) {
+		list_add_tail(&vif->list, &netbk->net_schedule_list);
+		xenvif_get(vif);
 	}
 	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
 }

-void netif_schedule_work(struct xen_netif *netif)
+void xenvif_schedule_work(struct xenvif *vif)
 {
-	struct xen_netbk *netbk = &xen_netbk[netif->group];
+	struct xen_netbk *netbk = &xen_netbk[vif->group];
 	int more_to_do;

-	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);

 	if (more_to_do) {
-		add_to_net_schedule_list_tail(netif);
+		add_to_net_schedule_list_tail(vif);
 		maybe_schedule_tx_action(netbk);
 	}
 }

-void netif_deschedule_work(struct xen_netif *netif)
+void xenvif_deschedule_work(struct xenvif *vif)
 {
-	struct xen_netbk *netbk = &xen_netbk[netif->group];
+	struct xen_netbk *netbk = &xen_netbk[vif->group];
 	spin_lock_irq(&netbk->net_schedule_list_lock);
-	remove_from_net_schedule_list(netif);
+	remove_from_net_schedule_list(vif);
 	spin_unlock_irq(&netbk->net_schedule_list_lock);
 }

-
-static void tx_add_credit(struct xen_netif *netif)
+static void tx_add_credit(struct xenvif *vif)
 {
 	unsigned long max_burst, max_credit;

@@ -984,23 +982,23 @@ static void tx_add_credit(struct xen_netif *netif)
 	 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
 	 * Otherwise the interface can seize up due to insufficient credit.
 	 */
-	max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
+	max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
 	max_burst = min(max_burst, 131072UL);
-	max_burst = max(max_burst, netif->credit_bytes);
+	max_burst = max(max_burst, vif->credit_bytes);

 	/* Take care that adding a new chunk of credit doesn't wrap to zero. */
-	max_credit = netif->remaining_credit + netif->credit_bytes;
-	if (max_credit < netif->remaining_credit)
+	max_credit = vif->remaining_credit + vif->credit_bytes;
+	if (max_credit < vif->remaining_credit)
 		max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */

-	netif->remaining_credit = min(max_credit, max_burst);
+	vif->remaining_credit = min(max_credit, max_burst);
 }

 static void tx_credit_callback(unsigned long data)
 {
-	struct xen_netif *netif = (struct xen_netif *)data;
-	tx_add_credit(netif);
-	netif_schedule_work(netif);
+	struct xenvif *vif = (struct xenvif *)data;
+	tx_add_credit(vif);
+	xenvif_schedule_work(vif);
 }

 static inline int copy_pending_req(struct xen_netbk *netbk,
@@ -1011,7 +1009,7 @@ static inline int copy_pending_req(struct xen_netbk *netbk,
 			&netbk->mmap_pages[pending_idx]);
 }

-static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
+static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
 {
 	struct netbk_tx_pending_inuse *inuse, *n;
 	struct gnttab_unmap_grant_ref *gop;
@@ -1110,34 +1108,34 @@ static inline void net_tx_action_dealloc(struct xen_netbk *netbk)
 		index = pending_index(netbk->pending_prod++);
 		netbk->pending_ring[index] = pending_idx;

-		netif_put(netif);
+		xenvif_put(vif);

 		list_del_init(&inuse->list);
 	}
 }

-static void netbk_tx_err(struct xen_netif *netif,
-		struct xen_netif_tx_request *txp, RING_IDX end)
+static void netbk_tx_err(struct xenvif *vif,
+			 struct xen_netif_tx_request *txp, RING_IDX end)
 {
-	RING_IDX cons = netif->tx.req_cons;
+	RING_IDX cons = vif->tx.req_cons;

 	do {
-		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
+		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
 		if (cons >= end)
 			break;
-		txp = RING_GET_REQUEST(&netif->tx, cons++);
+		txp = RING_GET_REQUEST(&vif->tx, cons++);
 	} while (1);
-	netif->tx.req_cons = cons;
-	netif_schedule_work(netif);
-	netif_put(netif);
+	vif->tx.req_cons = cons;
+	xenvif_schedule_work(vif);
+	xenvif_put(vif);
 }

-static int netbk_count_requests(struct xen_netif *netif,
+static int netbk_count_requests(struct xenvif *vif,
 				struct xen_netif_tx_request *first,
 				struct xen_netif_tx_request *txp,
 				int work_to_do)
 {
-	RING_IDX cons = netif->tx.req_cons;
+	RING_IDX cons = vif->tx.req_cons;
 	int frags = 0;

 	if (!(first->flags & XEN_NETTXF_more_data))
@@ -1154,7 +1152,7 @@ static int netbk_count_requests(struct xen_netif *netif,
 			return -frags;
 		}

-		memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
+		memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags),
 		       sizeof(*txp));
 		if (txp->size > first->size) {
 			pr_debug("Frags galore\n");
@@ -1175,7 +1173,7 @@ static int netbk_count_requests(struct xen_netif *netif,
 }

 static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *netbk,
-							   struct xen_netif *netif,
+							   struct xenvif *vif,
 							   struct sk_buff *skb,
 							   struct xen_netif_tx_request *txp,
 							   struct gnttab_map_grant_ref *mop)
@@ -1201,8 +1199,8 @@ static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *net
 				  txp->gref, netif->domid);

 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
-		netif_get(netif);
-		pending_tx_info[pending_idx].netif = netif;
+		xenvif_get(vif);
+		pending_tx_info[pending_idx].vif = vif;
 		frags[i].page = (void *)pending_idx;
 	}

@@ -1216,7 +1214,7 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
 	struct gnttab_map_grant_ref *mop = *mopp;
 	int pending_idx = *((u16 *)skb->data);
 	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
-	struct xen_netif *netif = pending_tx_info[pending_idx].netif;
+	struct xenvif *vif = pending_tx_info[pending_idx].vif;
 	struct xen_netif_tx_request *txp;
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	int nr_frags = shinfo->nr_frags;
@@ -1228,9 +1226,9 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
 		pending_ring_idx_t index;
 		index = pending_index(netbk->pending_prod++);
 		txp = &pending_tx_info[pending_idx].req;
-		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
+		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
 		netbk->pending_ring[index] = pending_idx;
-		netif_put(netif);
+		xenvif_put(vif);
 	} else {
 		set_phys_to_machine(
 			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
@@ -1264,10 +1262,10 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,

 		/* Error on this fragment: respond to client with an error. */
 		txp = &netbk->pending_tx_info[pending_idx].req;
-		make_tx_response(netif, txp, XEN_NETIF_RSP_ERROR);
+		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
 		index = pending_index(netbk->pending_prod++);
 		netbk->pending_ring[index] = pending_idx;
-		netif_put(netif);
+		xenvif_put(vif);

 		/* Not the first error? Preceding frags already invalidated. */
 		if (err)
@@ -1317,12 +1315,12 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
 	}
 }

-int netbk_get_extras(struct xen_netif *netif,
+int netbk_get_extras(struct xenvif *vif,
 		     struct xen_netif_extra_info *extras,
 		     int work_to_do)
 {
 	struct xen_netif_extra_info extra;
-	RING_IDX cons = netif->tx.req_cons;
+	RING_IDX cons = vif->tx.req_cons;

 	do {
 		if (unlikely(work_to_do-- <= 0)) {
@@ -1330,17 +1328,17 @@ int netbk_get_extras(struct xen_netif *netif,
 			return -EBADR;
 		}

-		memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
+		memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
 		       sizeof(extra));
 		if (unlikely(!extra.type ||
 			     extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
-			netif->tx.req_cons = ++cons;
+			vif->tx.req_cons = ++cons;
 			pr_debug("Invalid extra type: %d\n", extra.type);
 			return -EINVAL;
 		}

 		memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
-		netif->tx.req_cons = ++cons;
+		vif->tx.req_cons = ++cons;
 	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);

 	return work_to_do;
@@ -1370,7 +1368,7 @@ static int netbk_set_skb_gso(struct sk_buff *skb,
 	return 0;
 }

-static int checksum_setup(struct xen_netif *netif, struct sk_buff *skb)
+static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
 {
 	struct iphdr *iph;
 	unsigned char *th;
@@ -1384,7 +1382,7 @@ static int checksum_setup(struct xen_netif *netif, struct sk_buff *skb)
 	 * recalculate the partial checksum.
 	 */
 	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
-		netif->rx_gso_checksum_fixup++;
+		vif->rx_gso_checksum_fixup++;
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		recalculate_partial_csum = 1;
 	}
@@ -1440,30 +1438,30 @@ out:
 	return err;
 }

-static bool tx_credit_exceeded(struct xen_netif *netif, unsigned size)
+static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
 {
 	unsigned long now = jiffies;
 	unsigned long next_credit =
-		netif->credit_timeout.expires +
-		msecs_to_jiffies(netif->credit_usec / 1000);
+		vif->credit_timeout.expires +
+		msecs_to_jiffies(vif->credit_usec / 1000);

 	/* Timer could already be pending in rare cases. */
-	if (timer_pending(&netif->credit_timeout))
+	if (timer_pending(&vif->credit_timeout))
 		return true;

 	/* Passed the point where we can replenish credit? */
 	if (time_after_eq(now, next_credit)) {
-		netif->credit_timeout.expires = now;
-		tx_add_credit(netif);
+		vif->credit_timeout.expires = now;
+		tx_add_credit(vif);
 	}

 	/* Still too big to send right now? Set a callback. */
-	if (size > netif->remaining_credit) {
-		netif->credit_timeout.data     =
-			(unsigned long)netif;
-		netif->credit_timeout.function =
+	if (size > vif->remaining_credit) {
+		vif->credit_timeout.data     =
+			(unsigned long)vif;
+		vif->credit_timeout.function =
 			tx_credit_callback;
-		mod_timer(&netif->credit_timeout,
+		mod_timer(&vif->credit_timeout,
 			  next_credit);

 		return true;
@@ -1481,7 +1479,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
 	mop = netbk->tx_map_ops;
 	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
 		!list_empty(&netbk->net_schedule_list)) {
-		struct xen_netif *netif;
+		struct xenvif *vif;
 		struct xen_netif_tx_request txreq;
 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
@@ -1492,53 +1490,53 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
 		pending_ring_idx_t index;

 		/* Get a netif from the list with work to do. */
-		netif = poll_net_schedule_list(netbk);
-		if (!netif)
+		vif = poll_net_schedule_list(netbk);
+		if (!vif)
 			continue;

-		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
 		if (!work_to_do) {
-			netif_put(netif);
+			xenvif_put(vif);
 			continue;
 		}

-		idx = netif->tx.req_cons;
+		idx = vif->tx.req_cons;
 		rmb(); /* Ensure that we see the request before we copy it. */
-		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, idx), sizeof(txreq));
+		memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));

 		/* Credit-based scheduling. */
-		if (txreq.size > netif->remaining_credit &&
-		    tx_credit_exceeded(netif, txreq.size)) {
-			netif_put(netif);
+		if (txreq.size > vif->remaining_credit &&
+		    tx_credit_exceeded(vif, txreq.size)) {
+			xenvif_put(vif);
 			continue;
 		}

-		netif->remaining_credit -= txreq.size;
+		vif->remaining_credit -= txreq.size;

 		work_to_do--;
-		netif->tx.req_cons = ++idx;
+		vif->tx.req_cons = ++idx;

 		memset(extras, 0, sizeof(extras));
 		if (txreq.flags & XEN_NETTXF_extra_info) {
-			work_to_do = netbk_get_extras(netif, extras,
+			work_to_do = netbk_get_extras(vif, extras,
 						      work_to_do);
-			idx = netif->tx.req_cons;
+			idx = vif->tx.req_cons;
 			if (unlikely(work_to_do < 0)) {
-				netbk_tx_err(netif, &txreq, idx);
+				netbk_tx_err(vif, &txreq, idx);
 				continue;
 			}
 		}

-		ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
+		ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
 		if (unlikely(ret < 0)) {
-			netbk_tx_err(netif, &txreq, idx - ret);
+			netbk_tx_err(vif, &txreq, idx - ret);
 			continue;
 		}
 		idx += ret;

 		if (unlikely(txreq.size < ETH_HLEN)) {
 			pr_debug("Bad packet size: %d\n", txreq.size);
-			netbk_tx_err(netif, &txreq, idx);
+			netbk_tx_err(vif, &txreq, idx);
 			continue;
 		}

@@ -1547,7 +1545,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
 			pr_debug("txreq.offset: %x, size: %u, end: %lu\n",
 				 txreq.offset, txreq.size,
 				 (txreq.offset&~PAGE_MASK) + txreq.size);
-			netbk_tx_err(netif, &txreq, idx);
+			netbk_tx_err(vif, &txreq, idx);
 			continue;
 		}

@@ -1562,7 +1560,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
 				GFP_ATOMIC | __GFP_NOWARN);
 		if (unlikely(skb == NULL)) {
 			pr_debug("Can't allocate a skb in start_xmit.\n");
-			netbk_tx_err(netif, &txreq, idx);
+			netbk_tx_err(vif, &txreq, idx);
 			break;
 		}

@@ -1575,7 +1573,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)

 			if (netbk_set_skb_gso(skb, gso)) {
 				kfree_skb(skb);
-				netbk_tx_err(netif, &txreq, idx);
+				netbk_tx_err(vif, &txreq, idx);
 				continue;
 			}
 		}
@@ -1587,7 +1585,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)

 		memcpy(&netbk->pending_tx_info[pending_idx].req,
 		       &txreq, sizeof(txreq));
-		netbk->pending_tx_info[pending_idx].netif = netif;
+		netbk->pending_tx_info[pending_idx].vif = vif;
 		*((u16 *)skb->data) = pending_idx;

 		__skb_put(skb, data_len);
@@ -1608,8 +1606,8 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)

 		mop = xen_netbk_get_requests(netbk, netif, skb, txfrags, mop);

-		netif->tx.req_cons = idx;
-		netif_schedule_work(netif);
+		vif->tx.req_cons = idx;
+		xenvif_schedule_work(vif);

 		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
 			break;
@@ -1626,12 +1624,12 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 	mop = netbk->tx_map_ops;
 	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
 		struct xen_netif_tx_request *txp;
-		struct xen_netif *netif;
+		struct xenvif *vif;
 		u16 pending_idx;
 		unsigned data_len;

 		pending_idx = *((u16 *)skb->data);
-		netif = netbk->pending_tx_info[pending_idx].netif;
+		vif = netbk->pending_tx_info[pending_idx].vif;
 		txp = &netbk->pending_tx_info[pending_idx].req;

 		/* Check the remap error code. */
@@ -1672,10 +1670,10 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 			__pskb_pull_tail(skb, target - skb_headlen(skb));
 		}

-		skb->dev      = netif->dev;
+		skb->dev      = vif->dev;
 		skb->protocol = eth_type_trans(skb, skb->dev);

-		if (checksum_setup(netif, skb)) {
+		if (checksum_setup(vif, skb)) {
 			pr_debug("Can't setup checksum in net_tx_action\n");
 			kfree_skb(skb);
 			continue;
@@ -1688,11 +1686,11 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 			continue;
 		}

-		netif->stats.rx_bytes += skb->len;
-		netif->stats.rx_packets++;
+		vif->stats.rx_bytes += skb->len;
+		vif->stats.rx_packets++;

 		netif_rx_ni(skb);
-		netif->dev->last_rx = jiffies;
+		vif->dev->last_rx = jiffies;
 	}
 }

@@ -1755,57 +1753,57 @@ static void netif_page_release(struct page *page, unsigned int order)
 	xen_netbk_idx_release(&xen_netbk[group], idx);
 }

-irqreturn_t netif_be_int(int irq, void *dev_id)
+irqreturn_t xenvif_interrupt(int irq, void *dev_id)
 {
-	struct xen_netif *netif = dev_id;
+	struct xenvif *vif = dev_id;
 	struct xen_netbk *netbk;

-	if (netif->group == -1)
+	if (vif->group == -1)
 		return IRQ_NONE;

-	netbk = &xen_netbk[netif->group];
+	netbk = &xen_netbk[vif->group];

-	add_to_net_schedule_list_tail(netif);
+	add_to_net_schedule_list_tail(vif);
 	maybe_schedule_tx_action(netbk);

-	if (netif_schedulable(netif) && !netbk_queue_full(netif))
-		netif_wake_queue(netif->dev);
+	if (xenvif_schedulable(vif) && !xenvif_queue_full(vif))
+		netif_wake_queue(vif->dev);

 	return IRQ_HANDLED;
 }

-static void make_tx_response(struct xen_netif *netif,
+static void make_tx_response(struct xenvif *vif,
 			     struct xen_netif_tx_request *txp,
 			     s8       st)
 {
-	RING_IDX i = netif->tx.rsp_prod_pvt;
+	RING_IDX i = vif->tx.rsp_prod_pvt;
 	struct xen_netif_tx_response *resp;
 	int notify;

-	resp = RING_GET_RESPONSE(&netif->tx, i);
+	resp = RING_GET_RESPONSE(&vif->tx, i);
 	resp->id     = txp->id;
 	resp->status = st;

 	if (txp->flags & XEN_NETTXF_extra_info)
-		RING_GET_RESPONSE(&netif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
+		RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;

-	netif->tx.rsp_prod_pvt = ++i;
-	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+	vif->tx.rsp_prod_pvt = ++i;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
 	if (notify)
-		notify_remote_via_irq(netif->irq);
+		notify_remote_via_irq(vif->irq);
 }

-static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
+static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
 					     u16      id,
 					     s8       st,
 					     u16      offset,
 					     u16      size,
 					     u16      flags)
 {
-	RING_IDX i = netif->rx.rsp_prod_pvt;
+	RING_IDX i = vif->rx.rsp_prod_pvt;
 	struct xen_netif_rx_response *resp;

-	resp = RING_GET_RESPONSE(&netif->rx, i);
+	resp = RING_GET_RESPONSE(&vif->rx, i);
 	resp->offset     = offset;
 	resp->flags      = flags;
 	resp->id         = id;
@@ -1813,7 +1811,7 @@ static struct xen_netif_rx_response *make_rx_response(struct xen_netif *netif,
 	if (st < 0)
 		resp->status = (s16)st;

-	netif->rx.rsp_prod_pvt = ++i;
+	vif->rx.rsp_prod_pvt = ++i;

 	return resp;
 }
@@ -1964,7 +1962,7 @@ static int __init netback_init(void)
 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
 	}

-	rc = netif_xenbus_init();
+	rc = xenvif_xenbus_init();
 	if (rc)
 		goto failed_init;

diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 867dc25..a6ad259 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -23,7 +23,7 @@

 static int connect_rings(struct backend_info *);
 static void connect(struct backend_info *);
-static void backend_create_netif(struct backend_info *be);
+static void backend_create_xenvif(struct backend_info *be);
 static void unregister_hotplug_status_watch(struct backend_info *be);

 static int netback_remove(struct xenbus_device *dev)
@@ -31,11 +31,11 @@ static int netback_remove(struct xenbus_device *dev)
 	struct backend_info *be = dev_get_drvdata(&dev->dev);

 	unregister_hotplug_status_watch(be);
-	if (be->netif) {
+	if (be->vif) {
 		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
 		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
-		netif_disconnect(be->netif);
-		be->netif = NULL;
+		xenvif_disconnect(be->vif);
+		be->vif = NULL;
 	}
 	kfree(be);
 	dev_set_drvdata(&dev->dev, NULL);
@@ -121,7 +121,7 @@ static int netback_probe(struct xenbus_device *dev,
 		goto fail;

 	/* This kicks hotplug scripts, so do it immediately. */
-	backend_create_netif(be);
+	backend_create_xenvif(be);

 	return 0;

@@ -159,20 +159,20 @@ static int netback_uevent(struct xenbus_device *xdev,
 		kfree(val);
 	}

-	if (!be || !be->netif)
+	if (!be || !be->vif)
 		return 0;

-	return add_uevent_var(env, "vif=%s", be->netif->dev->name);
+	return add_uevent_var(env, "vif=%s", be->vif->dev->name);
 }


-static void backend_create_netif(struct backend_info *be)
+static void backend_create_xenvif(struct backend_info *be)
 {
 	int err;
 	long handle;
 	struct xenbus_device *dev = be->dev;

-	if (be->netif != NULL)
+	if (be->vif != NULL)
 		return;

 	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
@@ -181,10 +181,10 @@ static void backend_create_netif(struct backend_info *be)
 		return;
 	}

-	be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
-	if (IS_ERR(be->netif)) {
-		err = PTR_ERR(be->netif);
-		be->netif = NULL;
+	be->vif = xenvif_alloc(&dev->dev, dev->otherend_id, handle);
+	if (IS_ERR(be->vif)) {
+		err = PTR_ERR(be->vif);
+		be->vif = NULL;
 		xenbus_dev_fatal(dev, err, "creating interface");
 		return;
 	}
@@ -197,10 +197,10 @@ static void disconnect_backend(struct xenbus_device *dev)
 {
 	struct backend_info *be = dev_get_drvdata(&dev->dev);

-	if (be->netif) {
+	if (be->vif) {
 		xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
-		netif_disconnect(be->netif);
-		be->netif = NULL;
+		xenvif_disconnect(be->vif);
+		be->vif = NULL;
 	}
 }

@@ -231,13 +231,13 @@ static void frontend_changed(struct xenbus_device *dev,
 	case XenbusStateConnected:
 		if (dev->state == XenbusStateConnected)
 			break;
-		backend_create_netif(be);
-		if (be->netif)
+		backend_create_xenvif(be);
+		if (be->vif)
 			connect(be);
 		break;

 	case XenbusStateClosing:
-		if (be->netif)
+		if (be->vif)
 			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
 		disconnect_backend(dev);
 		xenbus_switch_state(dev, XenbusStateClosing);
@@ -357,15 +357,15 @@ static void connect(struct backend_info *be)
 	if (err)
 		return;

-	err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
+	err = xen_net_read_mac(dev, be->vif->fe_dev_addr);
 	if (err) {
 		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
 		return;
 	}

-	xen_net_read_rate(dev, &be->netif->credit_bytes,
-			  &be->netif->credit_usec);
-	be->netif->remaining_credit = be->netif->credit_bytes;
+	xen_net_read_rate(dev, &be->vif->credit_bytes,
+			  &be->vif->credit_usec);
+	be->vif->remaining_credit = be->vif->credit_bytes;

 	unregister_hotplug_status_watch(be);
 	err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
@@ -378,13 +378,13 @@ static void connect(struct backend_info *be)
 		be->have_hotplug_status_watch = 1;
 	}

-	netif_wake_queue(be->netif->dev);
+	netif_wake_queue(be->vif->dev);
 }


 static int connect_rings(struct backend_info *be)
 {
-	struct xen_netif *netif = be->netif;
+	struct xenvif *vif = be->vif;
 	struct xenbus_device *dev = be->dev;
 	unsigned long tx_ring_ref, rx_ring_ref;
 	unsigned int evtchn, rx_copy;
@@ -416,42 +416,42 @@ static int connect_rings(struct backend_info *be)
 	if (!rx_copy)
 		return -EOPNOTSUPP;

-	if (netif->dev->tx_queue_len != 0) {
+	if (vif->dev->tx_queue_len != 0) {
 		if (xenbus_scanf(XBT_NIL, dev->otherend,
 				 "feature-rx-notify", "%d", &val) < 0)
 			val = 0;
 		if (val)
-			netif->can_queue = 1;
+			vif->can_queue = 1;
 		else
 			/* Must be non-zero for pfifo_fast to work. */
-			netif->dev->tx_queue_len = 1;
+			vif->dev->tx_queue_len = 1;
 	}

 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
 			 "%d", &val) < 0)
 		val = 0;
-	netif->can_sg = !!val;
+	vif->can_sg = !!val;

 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
 			 "%d", &val) < 0)
 		val = 0;
-	netif->gso = !!val;
+	vif->gso = !!val;

 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
 			 "%d", &val) < 0)
 		val = 0;
-	netif->gso_prefix = !!val;
+	vif->gso_prefix = !!val;

 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
 			 "%d", &val) < 0)
 		val = 0;
-	netif->csum = !val;
+	vif->csum = !val;

 	/* Set dev->features */
-	netif_set_features(netif);
+	xenvif_set_features(vif);

 	/* Map the shared frame, irq etc. */
-	err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
+	err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, evtchn);
 	if (err) {
 		xenbus_dev_fatal(dev, err,
 				 "mapping shared-frames %lu/%lu port %u",
@@ -481,9 +481,7 @@ static struct xenbus_driver netback = {
 	.otherend_changed = frontend_changed,
 };

-
-int netif_xenbus_init(void)
+int xenvif_xenbus_init(void)
 {
-	printk(KERN_CRIT "registering netback\n");
 	return xenbus_register_backend(&netback);
 }
--
1.7.4


From 008474c5704c28efea54927ef735ed5904eb2e2b Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 21 Jan 2011 09:42:35 +0000
Subject: [PATCH 168/197] xen: netback: add reference from xenvif to xen_netbk

Rather than storing the group id simply store a pointer (opaque to xenvif).

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/common.h    |    6 +++++-
 drivers/net/xen-netback/interface.c |    2 +-
 drivers/net/xen-netback/netback.c   |   26 ++++++++++++++------------
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index f6da94b..ebe93fb 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -46,12 +46,16 @@
 #include <xen/grant_table.h>
 #include <xen/xenbus.h>

+struct xen_netbk;
+
 struct xenvif {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
-	int              group;
 	unsigned int     handle;

+	/* */
+	struct xen_netbk *netbk;
+
 	u8               fe_dev_addr[6];

 	/* Physical parameters of the comms window. */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index a065173..cfbb3cc 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -235,8 +235,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	vif = netdev_priv(dev);
 	memset(vif, 0, sizeof(*vif));
 	vif->domid  = domid;
-	vif->group  = -1;
 	vif->handle = handle;
+	vif->netbk  = NULL;
 	vif->can_sg = 1;
 	vif->csum = 1;
 	atomic_set(&vif->refcnt, 1);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f2f9c6f..4f04d2d 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -145,6 +145,8 @@ void xen_netbk_add_xenvif(struct xenvif *vif)
 	int i;
 	int min_netfront_count;
 	int min_group = 0;
+	struct xen_netbk *netbk;
+
 	min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
 	for (i = 0; i < xen_netbk_group_nr; i++) {
 		int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
@@ -154,13 +156,15 @@ void xen_netbk_add_xenvif(struct xenvif *vif)
 		}
 	}

-	vif->group = min_group;
-	atomic_inc(&xen_netbk[vif->group].netfront_count);
+	netbk = &xen_netbk[min_group];
+
+	vif->netbk = netbk;
+	atomic_inc(&netbk->netfront_count);
 }

 void xen_netbk_remove_xenvif(struct xenvif *vif)
 {
-	atomic_dec(&xen_netbk[vif->group].netfront_count);
+	atomic_dec(&vif->netbk->netfront_count);
 }

 static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
@@ -471,10 +475,10 @@ int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)

 	BUG_ON(skb->dev != dev);

-	if (vif->group == -1)
+	if (vif->netbk == NULL)
 		goto drop;

-	netbk = &xen_netbk[vif->group];
+	netbk = vif->netbk;

 	/* Drop the packet if the target domain has no receive buffers. */
 	if (unlikely(!xenvif_schedulable(vif) || xenvif_queue_full(vif)))
@@ -940,7 +944,7 @@ static void add_to_net_schedule_list_tail(struct xenvif *vif)
 {
 	unsigned long flags;

-	struct xen_netbk *netbk = &xen_netbk[vif->group];
+	struct xen_netbk *netbk = vif->netbk;
 	if (__on_net_schedule_list(vif))
 		return;

@@ -955,7 +959,7 @@ static void add_to_net_schedule_list_tail(struct xenvif *vif)

 void xenvif_schedule_work(struct xenvif *vif)
 {
-	struct xen_netbk *netbk = &xen_netbk[vif->group];
+	struct xen_netbk *netbk = vif->netbk;
 	int more_to_do;

 	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
@@ -968,7 +972,7 @@ void xenvif_schedule_work(struct xenvif *vif)

 void xenvif_deschedule_work(struct xenvif *vif)
 {
-	struct xen_netbk *netbk = &xen_netbk[vif->group];
+	struct xen_netbk *netbk = vif->netbk;
 	spin_lock_irq(&netbk->net_schedule_list_lock);
 	remove_from_net_schedule_list(vif);
 	spin_unlock_irq(&netbk->net_schedule_list_lock);
@@ -1756,13 +1760,11 @@ static void netif_page_release(struct page *page, unsigned int order)
 irqreturn_t xenvif_interrupt(int irq, void *dev_id)
 {
 	struct xenvif *vif = dev_id;
-	struct xen_netbk *netbk;
+	struct xen_netbk *netbk = vif->netbk;

-	if (vif->group == -1)
+	if (netbk == NULL)
 		return IRQ_NONE;

-	netbk = &xen_netbk[vif->group];
-
 	add_to_net_schedule_list_tail(vif);
 	maybe_schedule_tx_action(netbk);

--
1.7.4


From 658641318f5e98e948776240af98ed86a55a22ff Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 21 Jan 2011 10:45:25 +0000
Subject: [PATCH 169/197] xen: netback: refactor to separate network device from worker pools

The netback worker pool code is in netback.c and uses "struct xen_netbk" and
xen_netbk_*.

The network dfeivce interface is in interface.c and uses "struct xen_vif" and
xenvif_*.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/common.h    |   69 +++-------
 drivers/net/xen-netback/interface.c |  227 ++++++++++++++++++++++++++++++-
 drivers/net/xen-netback/netback.c   |  259 ++++++-----------------------------
 drivers/net/xen-netback/xenbus.c    |   11 +-
 4 files changed, 289 insertions(+), 277 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ebe93fb..b998a27 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -109,16 +109,6 @@ struct xenvif {
 	wait_queue_head_t waiting_to_free;
 };

-/*
- * Implement our own carrier flag: the network stack's version causes delays
- * when the carrier is re-enabled (in particular, dev_activate() may not
- * immediately be called, which can cause packet loss; also the etherbridge
- * can be rather lazy in activating its port).
- */
-#define xenvif_carrier_on(xenvif)	((xenvif)->carrier = 1)
-#define xenvif_carrier_off(xenvif)	((xenvif)->carrier = 0)
-#define xenvif_carrier_ok(xenvif)	((xenvif)->carrier)
-
 enum {
 	NETBK_DONT_COPY_SKB,
 	NETBK_DELAYED_COPY_SKB,
@@ -127,63 +117,40 @@ enum {

 extern int netbk_copy_skb_mode;

-struct backend_info {
-	struct xenbus_device *dev;
-	struct xenvif *vif;
-	enum xenbus_state frontend_state;
-	struct xenbus_watch hotplug_status_watch;
-	int have_hotplug_status_watch:1;
-};
-
-#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
-
-void xenvif_disconnect(struct xenvif *vif);
+#define XEN_NETIF_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
+#define XEN_NETIF_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)

-void xenvif_set_features(struct xenvif *vif);
 struct xenvif *xenvif_alloc(struct device *parent,
 			    domid_t domid,
 			    unsigned int handle);
+
 int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 		   unsigned long rx_ring_ref, unsigned int evtchn);
+void xenvif_disconnect(struct xenvif *vif);

-static inline void xenvif_get(struct xenvif *vif)
-{
-	atomic_inc(&vif->refcnt);
-}
-
-static inline void xenvif_put(struct xenvif *vif)
-{
-	if (atomic_dec_and_test(&vif->refcnt))
-		wake_up(&vif->waiting_to_free);
-}
+void xenvif_get(struct xenvif *vif);
+void xenvif_put(struct xenvif *vif);

 int xenvif_xenbus_init(void);

-#define xenvif_schedulable(vif)	\
-	(netif_running((vif)->dev) && xenvif_carrier_ok(vif))
+int xenvif_schedulable(struct xenvif *vif);

 void xenvif_schedule_work(struct xenvif *vif);
-void xenvif_deschedule_work(struct xenvif *vif);
-
-int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev);
-struct net_device_stats *xenvif_get_stats(struct net_device *dev);
-irqreturn_t xenvif_interrupt(int irq, void *dev_id);
-
-static inline int netbk_can_queue(struct net_device *dev)
-{
-	struct xenvif *vif = netdev_priv(dev);
-	return vif->can_queue;
-}

-static inline int netbk_can_sg(struct net_device *dev)
-{
-	struct xenvif *vif = netdev_priv(dev);
-	return vif->can_sg;
-}
+int xenvif_queue_full(struct xenvif *vif);

 /* (De)Register a xenvif with the netback backend. */
 void xen_netbk_add_xenvif(struct xenvif *vif);
 void xen_netbk_remove_xenvif(struct xenvif *vif);

+/* */
+void xen_netbk_schedule_xenvif(struct xenvif *vif);
+void xen_netbk_deschedule_xenfif(struct xenvif *vif);
+
+/* */
+unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
+
+/* */
+void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
+
 #endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index cfbb3cc..c906c79 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -36,7 +36,221 @@
 #include <xen/events.h>
 #include <asm/xen/hypercall.h>

-#define NETBK_QUEUE_LENGTH 32
+#define XENVIF_QUEUE_LENGTH 32
+
+void xenvif_get(struct xenvif *vif)
+{
+	atomic_inc(&vif->refcnt);
+}
+
+void xenvif_put(struct xenvif *vif)
+{
+	if (atomic_dec_and_test(&vif->refcnt))
+		wake_up(&vif->waiting_to_free);
+}
+
+static int xenvif_max_required_rx_slots(struct xenvif *vif)
+{
+	if (vif->can_sg || vif->gso || vif->gso_prefix)
+		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+	return 1; /* all in one */
+}
+
+int xenvif_queue_full(struct xenvif *vif)
+{
+	RING_IDX peek   = vif->rx_req_cons_peek;
+	RING_IDX needed = xenvif_max_required_rx_slots(vif);
+
+	return ((vif->rx.sring->req_prod - peek) < needed) ||
+	       ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
+}
+
+/*
+ * Implement our own carrier flag: the network stack's version causes delays
+ * when the carrier is re-enabled (in particular, dev_activate() may not
+ * immediately be called, which can cause packet loss; also the etherbridge
+ * can be rather lazy in activating its port).
+ */
+static void xenvif_carrier_on(struct xenvif *vif)
+{
+	vif->carrier = 1;
+}
+static void xenvif_carrier_off(struct xenvif *vif)
+{
+	vif->carrier = 0;
+}
+static int xenvif_carrier_ok(struct xenvif *vif)
+{
+	return vif->carrier;
+}
+
+int xenvif_schedulable(struct xenvif *vif)
+{
+	return netif_running(vif->dev) && xenvif_carrier_ok(vif);
+}
+
+static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
+{
+	struct xenvif *vif = dev_id;
+
+	if (vif->netbk == NULL)
+		return IRQ_NONE;
+
+	xen_netbk_schedule_xenvif(vif);
+
+	if (xenvif_schedulable(vif) && !xenvif_queue_full(vif))
+		netif_wake_queue(vif->dev);
+
+	return IRQ_HANDLED;
+}
+
+/* TODO: move to networking core */
+static struct sk_buff *xenvif_copy_skb(struct sk_buff *skb)
+{
+	struct skb_shared_info *ninfo;
+	struct sk_buff *nskb;
+	unsigned long offset;
+	int ret;
+	int len;
+	int headlen;
+
+	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
+
+	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!nskb))
+		goto err;
+
+	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
+	headlen = skb_end_pointer(nskb) - nskb->data;
+	if (headlen > skb_headlen(skb))
+		headlen = skb_headlen(skb);
+	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+	BUG_ON(ret);
+
+	ninfo = skb_shinfo(nskb);
+	ninfo->gso_size = skb_shinfo(skb)->gso_size;
+	ninfo->gso_type = skb_shinfo(skb)->gso_type;
+
+	offset = headlen;
+	len = skb->len - headlen;
+
+	nskb->len = skb->len;
+	nskb->data_len = len;
+	nskb->truesize += len;
+
+	while (len) {
+		struct page *page;
+		int copy;
+		int zero;
+
+		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+			dump_stack();
+			goto err_free;
+		}
+
+		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+
+		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
+		if (unlikely(!page))
+			goto err_free;
+
+		ret = skb_copy_bits(skb, offset, page_address(page), copy);
+		BUG_ON(ret);
+
+		ninfo->frags[ninfo->nr_frags].page = page;
+		ninfo->frags[ninfo->nr_frags].page_offset = 0;
+		ninfo->frags[ninfo->nr_frags].size = copy;
+		ninfo->nr_frags++;
+
+		offset += copy;
+		len -= copy;
+	}
+
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	offset = 0;
+#else
+	offset = nskb->data - skb->data;
+#endif
+
+	nskb->transport_header = skb->transport_header + offset;
+	nskb->network_header = skb->network_header + offset;
+	nskb->mac_header = skb->mac_header + offset;
+
+	return nskb;
+
+ err_free:
+	kfree_skb(nskb);
+ err:
+	return NULL;
+}
+
+static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xenvif *vif = netdev_priv(dev);
+
+	BUG_ON(skb->dev != dev);
+
+	if (vif->netbk == NULL)
+		goto drop;
+
+	/* Drop the packet if the target domain has no receive buffers. */
+	if (unlikely(!xenvif_schedulable(vif) || xenvif_queue_full(vif)))
+		goto drop;
+
+	/*
+	 * XXX For now we also copy skbuffs whose head crosses a page
+	 * boundary, because netbk_gop_skb can't handle them.
+	 */
+	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
+		struct sk_buff *nskb = xenvif_copy_skb(skb);
+		if (unlikely(nskb == NULL))
+			goto drop;
+		/* Copy only the header fields we use in this driver. */
+		nskb->dev = skb->dev;
+		nskb->ip_summed = skb->ip_summed;
+		dev_kfree_skb(skb);
+		skb = nskb;
+	}
+
+	/* Reserve ring slots for the worst-case number of fragments. */
+	vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb);
+	xenvif_get(vif);
+
+	if (vif->can_queue && xenvif_queue_full(vif)) {
+		vif->rx.sring->req_event = vif->rx_req_cons_peek +
+			xenvif_max_required_rx_slots(vif);
+		mb(); /* request notification /then/ check & stop the queue */
+		if (xenvif_queue_full(vif))
+			netif_stop_queue(dev);
+	}
+
+	xen_netbk_queue_tx_skb(vif, skb);
+
+	return 0;
+
+ drop:
+	vif->stats.tx_dropped++;
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
+{
+	struct xenvif *vif = netdev_priv(dev);
+	return &vif->stats;
+}
+
+void xenvif_schedule_work(struct xenvif *vif)
+{
+	int more_to_do;
+
+	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
+
+	if (more_to_do)
+		xen_netbk_schedule_xenvif(vif);
+}
+

 static void xenvif_up(struct xenvif *vif)
 {
@@ -48,7 +262,7 @@ static void xenvif_up(struct xenvif *vif)
 static void xenvif_down(struct xenvif *vif)
 {
 	disable_irq(vif->irq);
-	xenvif_deschedule_work(vif);
+	xen_netbk_deschedule_xenfif(vif);
 	xen_netbk_remove_xenvif(vif);
 }

@@ -73,7 +287,8 @@ static int xenvif_close(struct net_device *dev)

 static int xenvif_change_mtu(struct net_device *dev, int mtu)
 {
-	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+	struct xenvif *vif = netdev_priv(dev);
+	int max = vif->can_sg ? 65535 - ETH_HLEN : ETH_DATA_LEN;

 	if (mtu > max)
 		return -EINVAL;
@@ -81,7 +296,7 @@ static int xenvif_change_mtu(struct net_device *dev, int mtu)
 	return 0;
 }

-void xenvif_set_features(struct xenvif *vif)
+static void xenvif_set_features(struct xenvif *vif)
 {
 	struct net_device *dev = vif->dev;
 	int features = dev->features;
@@ -256,7 +471,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	xenvif_set_features(vif);
 	SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);

-	dev->tx_queue_len = NETBK_QUEUE_LENGTH;
+	dev->tx_queue_len = XENVIF_QUEUE_LENGTH;

 	/*
 	 * Initialise a dummy MAC address. We choose the numerically
@@ -352,6 +567,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 	if (vif->irq)
 		return 0;

+	xenvif_set_features(vif);
+
 	vif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
 	if (vif->tx_comms_area == NULL)
 		return -ENOMEM;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 4f04d2d..1b7005c 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -131,10 +131,10 @@ struct xen_netbk {
 	 * MAX_BUFFER_OFFSET of 4096 the worst case is that each
 	 * head/fragment uses 2 copy operation.
 	 */
-	struct gnttab_copy grant_copy_op[2*NET_RX_RING_SIZE];
+	struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
 	unsigned char rx_notify[NR_IRQS];
-	u16 notify_list[NET_RX_RING_SIZE];
-	struct netbk_rx_meta meta[2*NET_RX_RING_SIZE];
+	u16 notify_list[XEN_NETIF_RX_RING_SIZE];
+	struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
 };

 static struct xen_netbk *xen_netbk;
@@ -164,7 +164,9 @@ void xen_netbk_add_xenvif(struct xenvif *vif)

 void xen_netbk_remove_xenvif(struct xenvif *vif)
 {
-	atomic_dec(&vif->netbk->netfront_count);
+	struct xen_netbk *netbk = vif->netbk;
+	vif->netbk = NULL;
+	atomic_dec(&netbk->netfront_count);
 }

 static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
@@ -275,7 +277,7 @@ MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
  * dir indicates the data direction.
  * rx: 1, tx: 0.
  */
-static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
+void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
 {
 	if (MODPARM_netback_kthread)
 		wake_up(&netbk->kthread.netbk_action_wq);
@@ -285,110 +287,6 @@ static inline void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
 		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
 }

-static inline void maybe_schedule_tx_action(struct xen_netbk *netbk)
-{
-	smp_mb();
-	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
-	    !list_empty(&netbk->net_schedule_list))
-		xen_netbk_bh_handler(netbk, 0);
-}
-
-static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
-{
-	struct skb_shared_info *ninfo;
-	struct sk_buff *nskb;
-	unsigned long offset;
-	int ret;
-	int len;
-	int headlen;
-
-	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
-
-	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
-	if (unlikely(!nskb))
-		goto err;
-
-	skb_reserve(nskb, NET_SKB_PAD + NET_IP_ALIGN);
-	headlen = skb_end_pointer(nskb) - nskb->data;
-	if (headlen > skb_headlen(skb))
-		headlen = skb_headlen(skb);
-	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
-	BUG_ON(ret);
-
-	ninfo = skb_shinfo(nskb);
-	ninfo->gso_size = skb_shinfo(skb)->gso_size;
-	ninfo->gso_type = skb_shinfo(skb)->gso_type;
-
-	offset = headlen;
-	len = skb->len - headlen;
-
-	nskb->len = skb->len;
-	nskb->data_len = len;
-	nskb->truesize += len;
-
-	while (len) {
-		struct page *page;
-		int copy;
-		int zero;
-
-		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
-			dump_stack();
-			goto err_free;
-		}
-
-		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
-		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
-
-		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
-		if (unlikely(!page))
-			goto err_free;
-
-		ret = skb_copy_bits(skb, offset, page_address(page), copy);
-		BUG_ON(ret);
-
-		ninfo->frags[ninfo->nr_frags].page = page;
-		ninfo->frags[ninfo->nr_frags].page_offset = 0;
-		ninfo->frags[ninfo->nr_frags].size = copy;
-		ninfo->nr_frags++;
-
-		offset += copy;
-		len -= copy;
-	}
-
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
-	offset = 0;
-#else
-	offset = nskb->data - skb->data;
-#endif
-
-	nskb->transport_header = skb->transport_header + offset;
-	nskb->network_header = skb->network_header + offset;
-	nskb->mac_header = skb->mac_header + offset;
-
-	return nskb;
-
- err_free:
-	kfree_skb(nskb);
- err:
-	return NULL;
-}
-
-static inline int xenvif_max_required_rx_slots(struct xenvif *vif)
-{
-	if (vif->can_sg || vif->gso || vif->gso_prefix)
-		return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
-	return 1; /* all in one */
-}
-
-static inline int xenvif_queue_full(struct xenvif *vif)
-{
-	RING_IDX peek   = vif->rx_req_cons_peek;
-	RING_IDX needed = xenvif_max_required_rx_slots(vif);
-
-	return ((vif->rx.sring->req_prod - peek) < needed) ||
-	       ((vif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
-}
-
 /*
  * Returns true if we should start a new receive buffer instead of
  * adding 'size' bytes to a buffer which currently contains 'offset'
@@ -434,7 +332,7 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
  * the guest. This function is essentially a dry run of
  * netbk_gop_frag_copy.
  */
-static unsigned int count_skb_slots(struct sk_buff *skb, struct xenvif *vif)
+unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
 {
 	unsigned int count = 1;
 	int i, copy_off = 0;
@@ -468,60 +366,6 @@ static unsigned int count_skb_slots(struct sk_buff *skb, struct xenvif *vif)
 	return count;
 }

-int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct xenvif *vif = netdev_priv(dev);
-	struct xen_netbk *netbk;
-
-	BUG_ON(skb->dev != dev);
-
-	if (vif->netbk == NULL)
-		goto drop;
-
-	netbk = vif->netbk;
-
-	/* Drop the packet if the target domain has no receive buffers. */
-	if (unlikely(!xenvif_schedulable(vif) || xenvif_queue_full(vif)))
-		goto drop;
-
-	/*
-	 * XXX For now we also copy skbuffs whose head crosses a page
-	 * boundary, because netbk_gop_skb can't handle them.
-	 */
-	if ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE) {
-		struct sk_buff *nskb = netbk_copy_skb(skb);
-		if (unlikely(nskb == NULL))
-			goto drop;
-		/* Copy only the header fields we use in this driver. */
-		nskb->dev = skb->dev;
-		nskb->ip_summed = skb->ip_summed;
-		dev_kfree_skb(skb);
-		skb = nskb;
-	}
-
-	/* Reserve ring slots for the worst-case number of fragments. */
-	vif->rx_req_cons_peek += count_skb_slots(skb, vif);
-	xenvif_get(vif);
-
-	if (netbk_can_queue(dev) && xenvif_queue_full(vif)) {
-		vif->rx.sring->req_event = vif->rx_req_cons_peek +
-			xenvif_max_required_rx_slots(vif);
-		mb(); /* request notification /then/ check & stop the queue */
-		if (xenvif_queue_full(vif))
-			netif_stop_queue(dev);
-	}
-	skb_queue_tail(&netbk->rx_queue, skb);
-
-	xen_netbk_bh_handler(netbk, 1);
-
-	return 0;
-
- drop:
-	vif->stats.tx_dropped++;
-	dev_kfree_skb(skb);
-	return 0;
-}
-
 struct netrx_pending_operations {
 	unsigned copy_prod, copy_cons;
 	unsigned meta_prod, meta_cons;
@@ -780,7 +624,7 @@ static void xen_netbk_rx_action(unsigned long data)
 		__skb_queue_tail(&rxq, skb);

 		/* Filled the batch queue? */
-		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
+		if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
 			break;
 	}

@@ -887,9 +731,12 @@ static void xen_netbk_rx_action(unsigned long data)
 		xen_netbk_bh_handler(netbk, 1);
 }

-static void net_alarm(unsigned long data)
+void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
 {
-	struct xen_netbk *netbk = (struct xen_netbk *)data;
+	struct xen_netbk *netbk = vif->netbk;
+
+	skb_queue_tail(&netbk->rx_queue, skb);
+
 	xen_netbk_bh_handler(netbk, 1);
 }

@@ -899,10 +746,10 @@ static void xen_netbk_tx_pending_timeout(unsigned long data)
 	xen_netbk_bh_handler(netbk, 0);
 }

-struct net_device_stats *xenvif_get_stats(struct net_device *dev)
+static void xen_netbk_alarm(unsigned long data)
 {
-	struct xenvif *vif = netdev_priv(dev);
-	return &vif->stats;
+	struct xen_netbk *netbk = (struct xen_netbk *)data;
+	xen_netbk_bh_handler(netbk, 1);
 }

 static int __on_net_schedule_list(struct xenvif *vif)
@@ -940,13 +787,13 @@ out:
 	return vif;
 }

-static void add_to_net_schedule_list_tail(struct xenvif *vif)
+void xen_netbk_schedule_xenvif(struct xenvif *vif)
 {
 	unsigned long flags;

 	struct xen_netbk *netbk = vif->netbk;
 	if (__on_net_schedule_list(vif))
-		return;
+		goto kick;

 	spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
 	if (!__on_net_schedule_list(vif) &&
@@ -955,22 +802,15 @@ static void add_to_net_schedule_list_tail(struct xenvif *vif)
 		xenvif_get(vif);
 	}
 	spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
-}
-
-void xenvif_schedule_work(struct xenvif *vif)
-{
-	struct xen_netbk *netbk = vif->netbk;
-	int more_to_do;
-
-	RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);

-	if (more_to_do) {
-		add_to_net_schedule_list_tail(vif);
-		maybe_schedule_tx_action(netbk);
-	}
+kick:
+	smp_mb();
+	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
+	    !list_empty(&netbk->net_schedule_list))
+		xen_netbk_bh_handler(netbk, 0);
 }

-void xenvif_deschedule_work(struct xenvif *vif)
+void xen_netbk_deschedule_xenfif(struct xenvif *vif)
 {
 	struct xen_netbk *netbk = vif->netbk;
 	spin_lock_irq(&netbk->net_schedule_list_lock);
@@ -1019,7 +859,7 @@ static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
 	struct gnttab_unmap_grant_ref *gop;
 	u16 pending_idx;
 	pending_ring_idx_t dc, dp;
-	struct xen_netif *netif;
+	struct xenvif *vif;
 	int ret;
 	LIST_HEAD(list);

@@ -1077,7 +917,7 @@ static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)

 			pending_idx = inuse - netbk->pending_inuse;

-			pending_tx_info[pending_idx].netif->nr_copied_skbs++;
+			pending_tx_info[pending_idx].vif->nr_copied_skbs++;

 			switch (copy_pending_req(netbk, pending_idx)) {
 			case 0:
@@ -1101,9 +941,9 @@ static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
 		pending_tx_info = netbk->pending_tx_info;
 		pending_idx = inuse - netbk->pending_inuse;

-		netif = pending_tx_info[pending_idx].netif;
+		vif = pending_tx_info[pending_idx].vif;

-		make_tx_response(netif, &pending_tx_info[pending_idx].req,
+		make_tx_response(vif, &pending_tx_info[pending_idx].req,
 				 XEN_NETIF_RSP_OKAY);

 		/* Ready for next use. */
@@ -1200,7 +1040,7 @@ static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *net

 		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
 				  GNTMAP_host_map | GNTMAP_readonly,
-				  txp->gref, netif->domid);
+				  txp->gref, vif->domid);

 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
 		xenvif_get(vif);
@@ -1319,9 +1159,9 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
 	}
 }

-int netbk_get_extras(struct xenvif *vif,
-		     struct xen_netif_extra_info *extras,
-		     int work_to_do)
+static int xen_netbk_get_extras(struct xenvif *vif,
+				struct xen_netif_extra_info *extras,
+				int work_to_do)
 {
 	struct xen_netif_extra_info extra;
 	RING_IDX cons = vif->tx.req_cons;
@@ -1522,8 +1362,8 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)

 		memset(extras, 0, sizeof(extras));
 		if (txreq.flags & XEN_NETTXF_extra_info) {
-			work_to_do = netbk_get_extras(vif, extras,
-						      work_to_do);
+			work_to_do = xen_netbk_get_extras(vif, extras,
+							  work_to_do);
 			idx = vif->tx.req_cons;
 			if (unlikely(work_to_do < 0)) {
 				netbk_tx_err(vif, &txreq, idx);
@@ -1584,7 +1424,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)

 		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
 				  GNTMAP_host_map | GNTMAP_readonly,
-				  txreq.gref, netif->domid);
+				  txreq.gref, vif->domid);
 		mop++;

 		memcpy(&netbk->pending_tx_info[pending_idx].req,
@@ -1608,7 +1448,7 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)

 		netbk->pending_cons++;

-		mop = xen_netbk_get_requests(netbk, netif, skb, txfrags, mop);
+		mop = xen_netbk_get_requests(netbk, vif, skb, txfrags, mop);

 		vif->tx.req_cons = idx;
 		xenvif_schedule_work(vif);
@@ -1705,7 +1545,7 @@ static void xen_netbk_tx_action(unsigned long data)
 	unsigned nr_mops;
 	int ret;

-	net_tx_action_dealloc(netbk);
+	xen_netbk_tx_action_dealloc(netbk);

 	nr_mops = xen_netbk_tx_build_mops(netbk);

@@ -1738,7 +1578,7 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
 	spin_lock_irqsave(&_lock, flags);
 	index = pending_index(netbk->dealloc_prod);
 	netbk->dealloc_ring[index] = pending_idx;
-	/* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+	/* Sync with xen_netbk_tx_action_dealloc: insert idx /then/ incr producer. */
 	smp_wmb();
 	netbk->dealloc_prod++;
 	spin_unlock_irqrestore(&_lock, flags);
@@ -1757,23 +1597,6 @@ static void netif_page_release(struct page *page, unsigned int order)
 	xen_netbk_idx_release(&xen_netbk[group], idx);
 }

-irqreturn_t xenvif_interrupt(int irq, void *dev_id)
-{
-	struct xenvif *vif = dev_id;
-	struct xen_netbk *netbk = vif->netbk;
-
-	if (netbk == NULL)
-		return IRQ_NONE;
-
-	add_to_net_schedule_list_tail(vif);
-	maybe_schedule_tx_action(netbk);
-
-	if (xenvif_schedulable(vif) && !xenvif_queue_full(vif))
-		netif_wake_queue(vif->dev);
-
-	return IRQ_HANDLED;
-}
-
 static void make_tx_response(struct xenvif *vif,
 			     struct xen_netif_tx_request *txp,
 			     s8       st)
@@ -1887,12 +1710,12 @@ static int __init netback_init(void)

 		init_timer(&netbk->net_timer);
 		netbk->net_timer.data = (unsigned long)netbk;
-		netbk->net_timer.function = net_alarm;
+		netbk->net_timer.function = xen_netbk_alarm;

 		init_timer(&netbk->netbk_tx_pending_timer);
 		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
 		netbk->netbk_tx_pending_timer.function =
-			netbk_tx_pending_timeout;
+			xen_netbk_tx_pending_timeout;

 		netbk->mmap_pages =
 			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index a6ad259..e854420 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -21,6 +21,14 @@

 #include "common.h"

+struct backend_info {
+	struct xenbus_device *dev;
+	struct xenvif *vif;
+	enum xenbus_state frontend_state;
+	struct xenbus_watch hotplug_status_watch;
+	int have_hotplug_status_watch:1;
+};
+
 static int connect_rings(struct backend_info *);
 static void connect(struct backend_info *);
 static void backend_create_xenvif(struct backend_info *be);
@@ -447,9 +455,6 @@ static int connect_rings(struct backend_info *be)
 		val = 0;
 	vif->csum = !val;

-	/* Set dev->features */
-	xenvif_set_features(vif);
-
 	/* Map the shared frame, irq etc. */
 	err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, evtchn);
 	if (err) {
--
1.7.4


From 788c58b652351558b0689faa1cb6cadd8a9cc1d6 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 21 Jan 2011 11:10:22 +0000
Subject: [PATCH 170/197] xen: netback: switch to kthread mode and drop tasklet mode

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/netback.c |  108 +++++++++++-------------------------
 1 files changed, 33 insertions(+), 75 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 1b7005c..73e35fd 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -84,17 +84,8 @@ union page_ext {
 };

 struct xen_netbk {
-	union {
-		struct {
-			struct tasklet_struct net_tx_tasklet;
-			struct tasklet_struct net_rx_tasklet;
-		} tasklet;
-
-		struct {
-			wait_queue_head_t netbk_action_wq;
-			struct task_struct *task;
-		} kthread;
-	};
+	wait_queue_head_t wq;
+	struct task_struct *task;

 	struct sk_buff_head rx_queue;
 	struct sk_buff_head tx_queue;
@@ -180,10 +171,6 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
 					     u16      size,
 					     u16      flags);

-static void xen_netbk_tx_action(unsigned long data);
-
-static void xen_netbk_rx_action(unsigned long data);
-
 static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
 				       unsigned int idx)
 {
@@ -268,23 +255,9 @@ MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");

 int netbk_copy_skb_mode;

-static int MODPARM_netback_kthread;
-module_param_named(netback_kthread, MODPARM_netback_kthread, bool, 0);
-MODULE_PARM_DESC(netback_kthread, "Use kernel thread to replace tasklet");
-
-/*
- * Netback bottom half handler.
- * dir indicates the data direction.
- * rx: 1, tx: 0.
- */
-void xen_netbk_bh_handler(struct xen_netbk *netbk, int dir)
+static void xen_netbk_kick_thread(struct xen_netbk *netbk)
 {
-	if (MODPARM_netback_kthread)
-		wake_up(&netbk->kthread.netbk_action_wq);
-	else if (dir)
-		tasklet_schedule(&netbk->tasklet.net_rx_tasklet);
-	else
-		tasklet_schedule(&netbk->tasklet.net_tx_tasklet);
+	wake_up(&netbk->wq);
 }

 /*
@@ -587,10 +560,9 @@ struct skb_cb_overlay {
 	int meta_slots_used;
 };

-static void xen_netbk_rx_action(unsigned long data)
+static void xen_netbk_rx_action(struct xen_netbk *netbk)
 {
 	struct xenvif *vif = NULL;
-	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	s8 status;
 	u16 irq, flags;
 	struct xen_netif_rx_response *resp;
@@ -728,7 +700,7 @@ static void xen_netbk_rx_action(unsigned long data)
 	/* More work to do? */
 	if (!skb_queue_empty(&netbk->rx_queue) &&
 			!timer_pending(&netbk->net_timer))
-		xen_netbk_bh_handler(netbk, 1);
+		xen_netbk_kick_thread(netbk);
 }

 void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
@@ -737,19 +709,19 @@ void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)

 	skb_queue_tail(&netbk->rx_queue, skb);

-	xen_netbk_bh_handler(netbk, 1);
+	xen_netbk_kick_thread(netbk);
 }

 static void xen_netbk_tx_pending_timeout(unsigned long data)
 {
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
-	xen_netbk_bh_handler(netbk, 0);
+	xen_netbk_kick_thread(netbk);
 }

 static void xen_netbk_alarm(unsigned long data)
 {
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
-	xen_netbk_bh_handler(netbk, 1);
+	xen_netbk_kick_thread(netbk);
 }

 static int __on_net_schedule_list(struct xenvif *vif)
@@ -807,7 +779,7 @@ kick:
 	smp_mb();
 	if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
 	    !list_empty(&netbk->net_schedule_list))
-		xen_netbk_bh_handler(netbk, 0);
+		xen_netbk_kick_thread(netbk);
 }

 void xen_netbk_deschedule_xenfif(struct xenvif *vif)
@@ -1539,9 +1511,8 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 }

 /* Called after netfront has transmitted */
-static void xen_netbk_tx_action(unsigned long data)
+static void xen_netbk_tx_action(struct xen_netbk *netbk)
 {
-	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	unsigned nr_mops;
 	int ret;

@@ -1583,7 +1554,7 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
 	netbk->dealloc_prod++;
 	spin_unlock_irqrestore(&_lock, flags);

-	xen_netbk_bh_handler(netbk, 0);
+	xen_netbk_kick_thread(netbk);
 }

 static void netif_page_release(struct page *page, unsigned int order)
@@ -1662,11 +1633,11 @@ static inline int tx_work_todo(struct xen_netbk *netbk)
 	return 0;
 }

-static int xen_netbk_action_thread(void *data)
+static int xen_netbk_kthread(void *data)
 {
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	while (!kthread_should_stop()) {
-		wait_event_interruptible(netbk->kthread.netbk_action_wq,
+		wait_event_interruptible(netbk->wq,
 				rx_work_todo(netbk)
 				|| tx_work_todo(netbk)
 				|| kthread_should_stop());
@@ -1676,10 +1647,10 @@ static int xen_netbk_action_thread(void *data)
 			break;

 		if (rx_work_todo(netbk))
-			xen_netbk_rx_action((unsigned long)netbk);
+			xen_netbk_rx_action(netbk);

 		if (tx_work_todo(netbk))
-			xen_netbk_tx_action((unsigned long)netbk);
+			xen_netbk_tx_action(netbk);
 	}

 	return 0;
@@ -1739,34 +1710,23 @@ static int __init netback_init(void)
 		for (i = 0; i < MAX_PENDING_REQS; i++)
 			netbk->pending_ring[i] = i;

-		if (MODPARM_netback_kthread) {
-			init_waitqueue_head(&netbk->kthread.netbk_action_wq);
-			netbk->kthread.task =
-				kthread_create(xen_netbk_action_thread,
-					       (void *)netbk,
-					       "netback/%u", group);
-
-			if (!IS_ERR(netbk->kthread.task)) {
-				kthread_bind(netbk->kthread.task, group);
-			} else {
-				printk(KERN_ALERT
-					"kthread_run() fails at netback\n");
-				free_empty_pages_and_pagevec(netbk->mmap_pages,
-						MAX_PENDING_REQS);
-				del_timer(&netbk->netbk_tx_pending_timer);
-				del_timer(&netbk->net_timer);
-				rc = PTR_ERR(netbk->kthread.task);
-				goto failed_init;
-			}
-		} else {
-			tasklet_init(&netbk->tasklet.net_tx_tasklet,
-				     xen_netbk_tx_action,
-				     (unsigned long)netbk);
-			tasklet_init(&netbk->tasklet.net_rx_tasklet,
-				     xen_netbk_rx_action,
-				     (unsigned long)netbk);
+		init_waitqueue_head(&netbk->wq);
+		netbk->task = kthread_create(xen_netbk_kthread,
+					     (void *)netbk,
+					     "netback/%u", group);
+
+		if (IS_ERR(netbk->task)) {
+			printk(KERN_ALERT "kthread_run() fails at netback\n");
+			free_empty_pages_and_pagevec(netbk->mmap_pages,
+						     MAX_PENDING_REQS);
+			del_timer(&netbk->netbk_tx_pending_timer);
+			del_timer(&netbk->net_timer);
+			rc = PTR_ERR(netbk->task);
+			goto failed_init;
 		}

+		kthread_bind(netbk->task, group);
+
 		INIT_LIST_HEAD(&netbk->pending_inuse_head);
 		INIT_LIST_HEAD(&netbk->net_schedule_list);

@@ -1774,8 +1734,7 @@ static int __init netback_init(void)

 		atomic_set(&netbk->netfront_count, 0);

-		if (MODPARM_netback_kthread)
-			wake_up_process(netbk->kthread.task);
+		wake_up_process(netbk->task);
 	}

 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
@@ -1800,8 +1759,7 @@ failed_init:
 				MAX_PENDING_REQS);
 		del_timer(&netbk->netbk_tx_pending_timer);
 		del_timer(&netbk->net_timer);
-		if (MODPARM_netback_kthread)
-			kthread_stop(netbk->kthread.task);
+		kthread_stop(netbk->task);
 	}
 	vfree(xen_netbk);
 	return rc;
--
1.7.4


From d5a6c1ac70a48a2478d938db19fb6e6a5afd89b1 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Sat, 11 Dec 2010 10:15:50 +0000
Subject: [PATCH 171/197] xen: netback: Make dependency on PageForeign conditional

When PageForeign is not available we fallback to a copying TX mode.

All uses of PageForeign are now gated with HAVE_XEN_PAGE_FOREIGN, this should
allow for easier removal of the dependency for upstream, e.g. using unifdef.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/common.h    |    4 +
 drivers/net/xen-netback/interface.c |    2 +
 drivers/net/xen-netback/netback.c   |  279 ++++++++++++++++++++++++++++++-----
 drivers/net/xen-netback/xenbus.c    |    2 +
 4 files changed, 252 insertions(+), 35 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index b998a27..8890825 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -95,7 +95,9 @@ struct xenvif {
 	struct timer_list credit_timeout;

 	/* Statistics */
+#ifdef HAVE_XEN_PAGE_FOREIGN
 	int nr_copied_skbs;
+#endif
 	int rx_gso_checksum_fixup;

 	/* Miscellaneous private stuff. */
@@ -109,6 +111,7 @@ struct xenvif {
 	wait_queue_head_t waiting_to_free;
 };

+#ifdef HAVE_XEN_PAGE_FOREIGN
 enum {
 	NETBK_DONT_COPY_SKB,
 	NETBK_DELAYED_COPY_SKB,
@@ -116,6 +119,7 @@ enum {
 };

 extern int netbk_copy_skb_mode;
+#endif

 #define XEN_NETIF_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
 #define XEN_NETIF_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index c906c79..895b50b 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -365,10 +365,12 @@ static const struct xenvif_stat {
 	char name[ETH_GSTRING_LEN];
 	u16 offset;
 } xenvif_stats[] = {
+#ifdef HAVE_XEN_PAGE_FOREIGN
 	{
 		"copied_skbs",
 		offsetof(struct xenvif, nr_copied_skbs)
 	},
+#endif
 	{
 		"rx_gso_checksum_fixup",
 		offsetof(struct xenvif, rx_gso_checksum_fixup)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 73e35fd..60829d4 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -40,7 +40,9 @@

 #include <net/tcp.h>

+#ifdef HAVE_XEN_PAGE_FOREIGN
 #include <xen/balloon.h>
+#endif
 #include <xen/events.h>
 #include <xen/interface/memory.h>

@@ -59,10 +61,12 @@ struct netbk_rx_meta {
 	int gso_size;
 };

+#ifdef HAVE_XEN_PAGE_FOREIGN
 struct netbk_tx_pending_inuse {
 	struct list_head list;
 	unsigned long alloc_time;
 };
+#endif

 #define MAX_PENDING_REQS 256

@@ -91,16 +95,24 @@ struct xen_netbk {
 	struct sk_buff_head tx_queue;

 	struct timer_list net_timer;
+#ifdef HAVE_XEN_PAGE_FOREIGN
 	struct timer_list netbk_tx_pending_timer;
+#endif

+#ifdef HAVE_XEN_PAGE_FOREIGN
 	struct page **mmap_pages;
+#else
+	struct page *mmap_pages[MAX_PENDING_REQS];
+#endif

 	pending_ring_idx_t pending_prod;
 	pending_ring_idx_t pending_cons;
+#ifdef HAVE_XEN_PAGE_FOREIGN
 	pending_ring_idx_t dealloc_prod;
 	pending_ring_idx_t dealloc_cons;

 	struct list_head pending_inuse_head;
+#endif
 	struct list_head net_schedule_list;

 	/* Protect the net_schedule_list in netif. */
@@ -109,13 +121,20 @@ struct xen_netbk {
 	atomic_t netfront_count;

 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+#ifdef HAVE_XEN_PAGE_FOREIGN
 	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
 	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
 	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];

 	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+#else
+	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
+#endif
+
 	u16 pending_ring[MAX_PENDING_REQS];
+#ifdef HAVE_XEN_PAGE_FOREIGN
 	u16 dealloc_ring[MAX_PENDING_REQS];
+#endif

 	/*
 	 * Each head or fragment can be up to 4096 bytes. Given
@@ -184,9 +203,10 @@ static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
 }

 /* extra field used in struct page */
-static inline void set_page_ext(struct page *pg,
-				unsigned int group, unsigned int idx)
+static inline void set_page_ext(struct page *pg, struct xen_netbk *netbk,
+				unsigned int idx)
 {
+	unsigned int group = netbk - xen_netbk;
 	union page_ext ext = { .e = { .group = group + 1, .idx = idx } };

 	BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
@@ -200,8 +220,10 @@ static int get_page_ext(struct page *pg,
 	struct xen_netbk *netbk;
 	unsigned int group, idx;

+#ifdef HAVE_XEN_PAGE_FOREIGN
 	if (!PageForeign(pg))
 		return 0;
+#endif

 	group = ext.e.group - 1;

@@ -210,8 +232,10 @@ static int get_page_ext(struct page *pg,

 	netbk = &xen_netbk[group];

+#ifdef HAVE_XEN_PAGE_FOREIGN
 	if (netbk->mmap_pages == NULL)
 		return 0;
+#endif

 	idx = ext.e.idx;

@@ -248,12 +272,14 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
 		netbk->pending_prod + netbk->pending_cons;
 }

+#ifdef HAVE_XEN_PAGE_FOREIGN
 /* Setting this allows the safe use of this driver without netloop. */
 static int MODPARM_copy_skb = 1;
 module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
 MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");

 int netbk_copy_skb_mode;
+#endif

 static void xen_netbk_kick_thread(struct xen_netbk *netbk)
 {
@@ -712,11 +738,13 @@ void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
 	xen_netbk_kick_thread(netbk);
 }

+#ifdef HAVE_XEN_PAGE_FOREIGN
 static void xen_netbk_tx_pending_timeout(unsigned long data)
 {
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
 	xen_netbk_kick_thread(netbk);
 }
+#endif

 static void xen_netbk_alarm(unsigned long data)
 {
@@ -817,6 +845,7 @@ static void tx_credit_callback(unsigned long data)
 	xenvif_schedule_work(vif);
 }

+#ifdef HAVE_XEN_PAGE_FOREIGN
 static inline int copy_pending_req(struct xen_netbk *netbk,
 				   pending_ring_idx_t pending_idx)
 {
@@ -824,7 +853,9 @@ static inline int copy_pending_req(struct xen_netbk *netbk,
 			netbk->grant_tx_handle[pending_idx],
 			&netbk->mmap_pages[pending_idx]);
 }
+#endif

+#ifdef HAVE_XEN_PAGE_FOREIGN
 static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
 {
 	struct netbk_tx_pending_inuse *inuse, *n;
@@ -929,6 +960,7 @@ static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
 		list_del_init(&inuse->list);
 	}
 }
+#endif

 static void netbk_tx_err(struct xenvif *vif,
 			 struct xen_netif_tx_request *txp, RING_IDX end)
@@ -984,15 +1016,37 @@ static int netbk_count_requests(struct xenvif *vif,
 			return -frags;
 		}
 	} while ((txp++)->flags & XEN_NETTXF_more_data);
-
 	return frags;
 }

+#ifndef HAVE_XEN_PAGE_FOREIGN
+static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
+					 struct sk_buff *skb,
+					 unsigned long pending_idx)
+{
+	struct page *page;
+	page = alloc_page(GFP_KERNEL|__GFP_COLD);
+	if (!page)
+		return NULL;
+	set_page_ext(page, netbk, pending_idx);
+	netbk->mmap_pages[pending_idx] = page;
+	return page;
+}
+#endif
+
+#ifdef HAVE_XEN_PAGE_FOREIGN
 static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *netbk,
 							   struct xenvif *vif,
 							   struct sk_buff *skb,
 							   struct xen_netif_tx_request *txp,
-							   struct gnttab_map_grant_ref *mop)
+							   struct gnttab_map_grant_ref *gop)
+#else
+static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
+						  struct xenvif *vif,
+						  struct sk_buff *skb,
+						  struct xen_netif_tx_request *txp,
+						  struct gnttab_copy *gop)
+#endif
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	skb_frag_t *frags = shinfo->frags;
@@ -1003,16 +1057,39 @@ static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *net
 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);

 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
+#ifndef HAVE_XEN_PAGE_FOREIGN
+		struct page *page;
+#endif
 		pending_ring_idx_t index;
 		struct pending_tx_info *pending_tx_info =
 			netbk->pending_tx_info;

 		index = pending_index(netbk->pending_cons++);
 		pending_idx = netbk->pending_ring[index];
-
-		gnttab_set_map_op(mop++, idx_to_kaddr(netbk, pending_idx),
+#ifdef HAVE_XEN_PAGE_FOREIGN
+		gnttab_set_map_op(gop++, idx_to_kaddr(netbk, pending_idx),
 				  GNTMAP_host_map | GNTMAP_readonly,
 				  txp->gref, vif->domid);
+#else
+		page = xen_netbk_alloc_page(netbk, skb, pending_idx);
+		if (!page)
+			return NULL;
+
+		netbk->mmap_pages[pending_idx] = page;
+
+		gop->source.u.ref = txp->gref;
+		gop->source.domid = vif->domid;
+		gop->source.offset = txp->offset;
+
+		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
+		gop->dest.domid = DOMID_SELF;
+		gop->dest.offset = txp->offset;
+
+		gop->len = txp->size;
+		gop->flags = GNTCOPY_source_gref;
+
+		gop++;
+#endif

 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
 		xenvif_get(vif);
@@ -1020,14 +1097,24 @@ static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *net
 		frags[i].page = (void *)pending_idx;
 	}

-	return mop;
+	return gop;
 }

-static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
+#ifdef HAVE_XEN_PAGE_FOREIGN
+static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
 				  struct sk_buff *skb,
-				  struct gnttab_map_grant_ref **mopp)
+				  struct gnttab_map_grant_ref **gopp)
+#else
+static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
+				  struct sk_buff *skb,
+				  struct gnttab_copy **gopp)
+#endif
 {
-	struct gnttab_map_grant_ref *mop = *mopp;
+#ifdef HAVE_XEN_PAGE_FOREIGN
+	struct gnttab_map_grant_ref *gop = *gopp;
+#else
+	struct gnttab_copy *gop = *gopp;
+#endif
 	int pending_idx = *((u16 *)skb->data);
 	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
 	struct xenvif *vif = pending_tx_info[pending_idx].vif;
@@ -1037,7 +1124,7 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
 	int i, err, start;

 	/* Check status of header. */
-	err = mop->status;
+	err = gop->status;
 	if (unlikely(err)) {
 		pending_ring_idx_t index;
 		index = pending_index(netbk->pending_prod++);
@@ -1045,11 +1132,13 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
 		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
 		netbk->pending_ring[index] = pending_idx;
 		xenvif_put(vif);
+#ifdef HAVE_XEN_PAGE_FOREIGN
 	} else {
 		set_phys_to_machine(
 			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
-			FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
-		netbk->grant_tx_handle[pending_idx] = mop->handle;
+			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
+		netbk->grant_tx_handle[pending_idx] = gop->handle;
+#endif
 	}

 	/* Skip first skb fragment if it is on same page as header fragment. */
@@ -1062,14 +1151,16 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
 		pending_idx = (unsigned long)shinfo->frags[i].page;

 		/* Check error status: if okay then remember grant handle. */
-		newerr = (++mop)->status;
+		newerr = (++gop)->status;
 		if (likely(!newerr)) {
+#ifdef HAVE_XEN_PAGE_FOREIGN
 			unsigned long addr;
 			addr = idx_to_kaddr(netbk, pending_idx);
 			set_phys_to_machine(
 				__pa(addr)>>PAGE_SHIFT,
-				FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
-			netbk->grant_tx_handle[pending_idx] = mop->handle;
+				FOREIGN_FRAME(gop->dev_bus_addr>>PAGE_SHIFT));
+			netbk->grant_tx_handle[pending_idx] = gop->handle;
+#endif
 			/* Had a previous error? Invalidate this fragment. */
 			if (unlikely(err))
 				xen_netbk_idx_release(netbk, pending_idx);
@@ -1099,7 +1190,7 @@ static int xen_netbk_tx_check_mop(struct xen_netbk *netbk,
 		err = newerr;
 	}

-	*mopp = mop + 1;
+	*gopp = gop + 1;
 	return err;
 }

@@ -1115,10 +1206,11 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
 		unsigned long pending_idx;

 		pending_idx = (unsigned long)frag->page;
-
+#ifdef HAVE_XEN_PAGE_FOREIGN
 		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
 		list_add_tail(&netbk->pending_inuse[pending_idx].list,
 			      &netbk->pending_inuse_head);
+#endif

 		txp = &netbk->pending_tx_info[pending_idx].req;
 		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
@@ -1128,6 +1220,10 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
 		skb->len += txp->size;
 		skb->data_len += txp->size;
 		skb->truesize += txp->size;
+
+		/* Take an extra reference to offset xen_netbk_idx_release */
+		get_page(netbk->mmap_pages[pending_idx]);
+		xen_netbk_idx_release(netbk, pending_idx);
 	}
 }

@@ -1286,18 +1382,24 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
 	return false;
 }

-static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
+static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
 {
-	struct gnttab_map_grant_ref *mop;
+#ifdef HAVE_XEN_PAGE_FOREIGN
+	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops, *request_gop;
+#else
+	struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
+#endif
 	struct sk_buff *skb;
 	int ret;

-	mop = netbk->tx_map_ops;
 	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
 		!list_empty(&netbk->net_schedule_list)) {
 		struct xenvif *vif;
 		struct xen_netif_tx_request txreq;
 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+#ifndef HAVE_XEN_PAGE_FOREIGN
+		struct page *page;
+#endif
 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
 		u16 pending_idx;
 		RING_IDX idx;
@@ -1394,10 +1496,35 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)
 			}
 		}

-		gnttab_set_map_op(mop, idx_to_kaddr(netbk, pending_idx),
+#ifdef HAVE_XEN_PAGE_FOREIGN
+		gnttab_set_map_op(gop, idx_to_kaddr(netbk, pending_idx),
 				  GNTMAP_host_map | GNTMAP_readonly,
 				  txreq.gref, vif->domid);
-		mop++;
+		gop++;
+#else
+		/* XXX could copy straight to head */
+		page = xen_netbk_alloc_page(netbk, skb, pending_idx);
+		if (!page) {
+			kfree_skb(skb);
+			netbk_tx_err(vif, &txreq, idx);
+			continue;
+		}
+
+		netbk->mmap_pages[pending_idx] = page;
+
+		gop->source.u.ref = txreq.gref;
+		gop->source.domid = vif->domid;
+		gop->source.offset = txreq.offset;
+
+		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
+		gop->dest.domid = DOMID_SELF;
+		gop->dest.offset = txreq.offset;
+
+		gop->len = txreq.size;
+		gop->flags = GNTCOPY_source_gref;
+
+		gop++;
+#endif

 		memcpy(&netbk->pending_tx_info[pending_idx].req,
 		       &txreq, sizeof(txreq));
@@ -1420,24 +1547,43 @@ static unsigned xen_netbk_tx_build_mops(struct xen_netbk *netbk)

 		netbk->pending_cons++;

-		mop = xen_netbk_get_requests(netbk, vif, skb, txfrags, mop);
+		request_gop = xen_netbk_get_requests(netbk, vif,
+						     skb, txfrags, gop);
+		if (request_gop == NULL) {
+			kfree_skb(skb);
+			netbk_tx_err(vif, &txreq, idx);
+			continue;
+		}
+		gop = request_gop;

 		vif->tx.req_cons = idx;
 		xenvif_schedule_work(vif);

-		if ((mop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
+#ifdef HAVE_XEN_PAGE_FOREIGN
+		if ((gop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
 			break;
+#else
+		if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
+			break;
+#endif
 	}

-	return mop - netbk->tx_map_ops;
+#ifdef HAVE_XEN_PAGE_FOREIGN
+	return gop - netbk->tx_map_ops;
+#else
+	return gop - netbk->tx_copy_ops;
+#endif
 }

 static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 {
-	struct gnttab_map_grant_ref *mop;
+#ifdef HAVE_XEN_PAGE_FOREIGN
+	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops;
+#else
+	struct gnttab_copy *gop = netbk->tx_copy_ops;
+#endif
 	struct sk_buff *skb;

-	mop = netbk->tx_map_ops;
 	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
 		struct xen_netif_tx_request *txp;
 		struct xenvif *vif;
@@ -1449,7 +1595,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 		txp = &netbk->pending_tx_info[pending_idx].req;

 		/* Check the remap error code. */
-		if (unlikely(xen_netbk_tx_check_mop(netbk, skb, &mop))) {
+		if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) {
 			pr_debug("netback grant failed.\n");
 			skb_shinfo(skb)->nr_frags = 0;
 			kfree_skb(skb);
@@ -1495,12 +1641,14 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 			continue;
 		}

+#ifdef HAVE_XEN_PAGE_FOREIGN
 		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
 		    unlikely(skb_linearize(skb))) {
 			pr_debug("Can't linearize skb in net_tx_action.\n");
 			kfree_skb(skb);
 			continue;
 		}
+#endif

 		vif->stats.rx_bytes += skb->len;
 		vif->stats.rx_packets++;
@@ -1513,21 +1661,31 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 /* Called after netfront has transmitted */
 static void xen_netbk_tx_action(struct xen_netbk *netbk)
 {
-	unsigned nr_mops;
+	unsigned nr_gops;
 	int ret;

+#ifdef HAVE_XEN_PAGE_FOREIGN
 	xen_netbk_tx_action_dealloc(netbk);
+#endif

-	nr_mops = xen_netbk_tx_build_mops(netbk);
+	nr_gops = xen_netbk_tx_build_gops(netbk);

-	if (nr_mops == 0)
+#ifdef HAVE_XEN_PAGE_FOREIGN
+	if (nr_gops == 0)
 		goto out;
-
 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-					netbk->tx_map_ops, nr_mops);
+					netbk->tx_map_ops, nr_gops);
+#else
+	if (nr_gops == 0)
+		return;
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
+					netbk->tx_copy_ops, nr_gops);
+#endif
 	BUG_ON(ret);

 	xen_netbk_tx_submit(netbk);
+
+#ifdef HAVE_XEN_PAGE_FOREIGN
 out:
 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
 	    !list_empty(&netbk->pending_inuse_head)) {
@@ -1538,8 +1696,10 @@ out:
 		mod_timer(&netbk->netbk_tx_pending_timer,
 				oldest->alloc_time + HZ);
 	}
+#endif
 }

+#ifdef HAVE_XEN_PAGE_FOREIGN
 static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
 {
 	static DEFINE_SPINLOCK(_lock);
@@ -1567,6 +1727,33 @@ static void netif_page_release(struct page *page, unsigned int order)

 	xen_netbk_idx_release(&xen_netbk[group], idx);
 }
+#else
+static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
+{
+	struct xenvif *vif;
+	struct pending_tx_info *pending_tx_info;
+	pending_ring_idx_t index;
+
+	/* Already complete? */
+	if (netbk->mmap_pages[pending_idx] == NULL)
+		return;
+
+	pending_tx_info = &netbk->pending_tx_info[pending_idx];
+
+	vif = pending_tx_info->vif;
+
+	make_tx_response(vif, &pending_tx_info->req, XEN_NETIF_RSP_OKAY);
+
+	index = pending_index(netbk->pending_prod++);
+	netbk->pending_ring[index] = pending_idx;
+
+	xenvif_put(vif);
+
+	netbk->mmap_pages[pending_idx]->mapping = 0;
+	put_page(netbk->mmap_pages[pending_idx]);
+	netbk->mmap_pages[pending_idx] = NULL;
+}
+#endif

 static void make_tx_response(struct xenvif *vif,
 			     struct xen_netif_tx_request *txp,
@@ -1619,12 +1806,14 @@ static inline int rx_work_todo(struct xen_netbk *netbk)

 static inline int tx_work_todo(struct xen_netbk *netbk)
 {
+#ifdef HAVE_XEN_PAGE_FOREIGN
 	if (netbk->dealloc_cons != netbk->dealloc_prod)
 		return 1;

 	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
 	    !list_empty(&netbk->pending_inuse_head))
 		return 1;
+#endif

 	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
 			!list_empty(&netbk->net_schedule_list))
@@ -1659,7 +1848,9 @@ static int xen_netbk_kthread(void *data)
 static int __init netback_init(void)
 {
 	int i;
+#ifdef HAVE_XEN_PAGE_FOREIGN
 	struct page *page;
+#endif
 	int rc = 0;
 	int group;

@@ -1683,11 +1874,14 @@ static int __init netback_init(void)
 		netbk->net_timer.data = (unsigned long)netbk;
 		netbk->net_timer.function = xen_netbk_alarm;

+#ifdef HAVE_XEN_PAGE_FOREIGN
 		init_timer(&netbk->netbk_tx_pending_timer);
 		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
 		netbk->netbk_tx_pending_timer.function =
 			xen_netbk_tx_pending_timeout;
+#endif

+#ifdef HAVE_XEN_PAGE_FOREIGN
 		netbk->mmap_pages =
 			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
 		if (!netbk->mmap_pages) {
@@ -1701,9 +1895,10 @@ static int __init netback_init(void)
 		for (i = 0; i < MAX_PENDING_REQS; i++) {
 			page = netbk->mmap_pages[i];
 			SetPageForeign(page, netif_page_release);
-			set_page_ext(page, group, i);
+			set_page_ext(page, netbk, i);
 			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
 		}
+#endif

 		netbk->pending_cons = 0;
 		netbk->pending_prod = MAX_PENDING_REQS;
@@ -1717,9 +1912,11 @@ static int __init netback_init(void)

 		if (IS_ERR(netbk->task)) {
 			printk(KERN_ALERT "kthread_run() fails at netback\n");
+#ifdef HAVE_XEN_PAGE_FOREIGN
 			free_empty_pages_and_pagevec(netbk->mmap_pages,
 						     MAX_PENDING_REQS);
 			del_timer(&netbk->netbk_tx_pending_timer);
+#endif
 			del_timer(&netbk->net_timer);
 			rc = PTR_ERR(netbk->task);
 			goto failed_init;
@@ -1727,7 +1924,9 @@ static int __init netback_init(void)

 		kthread_bind(netbk->task, group);

+#ifdef HAVE_XEN_PAGE_FOREIGN
 		INIT_LIST_HEAD(&netbk->pending_inuse_head);
+#endif
 		INIT_LIST_HEAD(&netbk->net_schedule_list);

 		spin_lock_init(&netbk->net_schedule_list_lock);
@@ -1737,6 +1936,7 @@ static int __init netback_init(void)
 		wake_up_process(netbk->task);
 	}

+#ifdef HAVE_XEN_PAGE_FOREIGN
 	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
 	if (MODPARM_copy_skb) {
 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
@@ -1745,6 +1945,7 @@ static int __init netback_init(void)
 		else
 			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
 	}
+#endif

 	rc = xenvif_xenbus_init();
 	if (rc)
@@ -1755,9 +1956,17 @@ static int __init netback_init(void)
 failed_init:
 	for (i = 0; i < group; i++) {
 		struct xen_netbk *netbk = &xen_netbk[i];
+#ifdef HAVE_XEN_PAGE_FOREIGN
 		free_empty_pages_and_pagevec(netbk->mmap_pages,
 				MAX_PENDING_REQS);
 		del_timer(&netbk->netbk_tx_pending_timer);
+#else
+		int j;
+		for (j = 0; j < MAX_PENDING_REQS; j++) {
+			if (netbk->mmap_pages[i])
+				__free_page(netbk->mmap_pages[i]);
+		}
+#endif
 		del_timer(&netbk->net_timer);
 		kthread_stop(netbk->task);
 	}
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index e854420..d56b1ee 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -74,8 +74,10 @@ static int netback_probe(struct xenbus_device *dev,
 	dev_set_drvdata(&dev->dev, be);

 	sg = 1;
+#ifdef HAVE_XEN_PAGE_FOREIGN
 	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
 		sg = 0;
+#endif

 	do {
 		err = xenbus_transaction_start(&xbt);
--
1.7.4


From b8e78c8e67316b212a19075ace0271e12e1b596f Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 19 Jan 2011 09:43:44 +0000
Subject: [PATCH 172/197] xen: netback: completely drop foreign page support

for i in drivers/net/xen-netback/*.[ch] ; do
	echo $i
	./scripts/unifdef -B -UHAVE_XEN_PAGE_FOREIGN $i > $i.unifdef
	mv $i.unifdef $i
done

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/common.h    |   13 --
 drivers/net/xen-netback/interface.c |    6 -
 drivers/net/xen-netback/netback.c   |  376 -----------------------------------
 drivers/net/xen-netback/xenbus.c    |    4 -
 4 files changed, 0 insertions(+), 399 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 8890825..03196ab 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -95,9 +95,6 @@ struct xenvif {
 	struct timer_list credit_timeout;

 	/* Statistics */
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	int nr_copied_skbs;
-#endif
 	int rx_gso_checksum_fixup;

 	/* Miscellaneous private stuff. */
@@ -111,16 +108,6 @@ struct xenvif {
 	wait_queue_head_t waiting_to_free;
 };

-#ifdef HAVE_XEN_PAGE_FOREIGN
-enum {
-	NETBK_DONT_COPY_SKB,
-	NETBK_DELAYED_COPY_SKB,
-	NETBK_ALWAYS_COPY_SKB,
-};
-
-extern int netbk_copy_skb_mode;
-#endif
-
 #define XEN_NETIF_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
 #define XEN_NETIF_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 895b50b..7299086 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -365,12 +365,6 @@ static const struct xenvif_stat {
 	char name[ETH_GSTRING_LEN];
 	u16 offset;
 } xenvif_stats[] = {
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	{
-		"copied_skbs",
-		offsetof(struct xenvif, nr_copied_skbs)
-	},
-#endif
 	{
 		"rx_gso_checksum_fixup",
 		offsetof(struct xenvif, rx_gso_checksum_fixup)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 60829d4..1fbcd1c 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -40,9 +40,6 @@

 #include <net/tcp.h>

-#ifdef HAVE_XEN_PAGE_FOREIGN
-#include <xen/balloon.h>
-#endif
 #include <xen/events.h>
 #include <xen/interface/memory.h>

@@ -61,13 +58,6 @@ struct netbk_rx_meta {
 	int gso_size;
 };

-#ifdef HAVE_XEN_PAGE_FOREIGN
-struct netbk_tx_pending_inuse {
-	struct list_head list;
-	unsigned long alloc_time;
-};
-#endif
-
 #define MAX_PENDING_REQS 256

 #define MAX_BUFFER_OFFSET PAGE_SIZE
@@ -95,24 +85,11 @@ struct xen_netbk {
 	struct sk_buff_head tx_queue;

 	struct timer_list net_timer;
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	struct timer_list netbk_tx_pending_timer;
-#endif

-#ifdef HAVE_XEN_PAGE_FOREIGN
-	struct page **mmap_pages;
-#else
 	struct page *mmap_pages[MAX_PENDING_REQS];
-#endif

 	pending_ring_idx_t pending_prod;
 	pending_ring_idx_t pending_cons;
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	pending_ring_idx_t dealloc_prod;
-	pending_ring_idx_t dealloc_cons;
-
-	struct list_head pending_inuse_head;
-#endif
 	struct list_head net_schedule_list;

 	/* Protect the net_schedule_list in netif. */
@@ -121,20 +98,9 @@ struct xen_netbk {
 	atomic_t netfront_count;

 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
-	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
-	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
-
-	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-#else
 	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
-#endif

 	u16 pending_ring[MAX_PENDING_REQS];
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	u16 dealloc_ring[MAX_PENDING_REQS];
-#endif

 	/*
 	 * Each head or fragment can be up to 4096 bytes. Given
@@ -220,11 +186,6 @@ static int get_page_ext(struct page *pg,
 	struct xen_netbk *netbk;
 	unsigned int group, idx;

-#ifdef HAVE_XEN_PAGE_FOREIGN
-	if (!PageForeign(pg))
-		return 0;
-#endif
-
 	group = ext.e.group - 1;

 	if (group < 0 || group >= xen_netbk_group_nr)
@@ -232,11 +193,6 @@ static int get_page_ext(struct page *pg,

 	netbk = &xen_netbk[group];

-#ifdef HAVE_XEN_PAGE_FOREIGN
-	if (netbk->mmap_pages == NULL)
-		return 0;
-#endif
-
 	idx = ext.e.idx;

 	if ((idx < 0) || (idx >= MAX_PENDING_REQS))
@@ -272,15 +228,6 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
 		netbk->pending_prod + netbk->pending_cons;
 }

-#ifdef HAVE_XEN_PAGE_FOREIGN
-/* Setting this allows the safe use of this driver without netloop. */
-static int MODPARM_copy_skb = 1;
-module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
-MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
-
-int netbk_copy_skb_mode;
-#endif
-
 static void xen_netbk_kick_thread(struct xen_netbk *netbk)
 {
 	wake_up(&netbk->wq);
@@ -738,14 +685,6 @@ void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
 	xen_netbk_kick_thread(netbk);
 }

-#ifdef HAVE_XEN_PAGE_FOREIGN
-static void xen_netbk_tx_pending_timeout(unsigned long data)
-{
-	struct xen_netbk *netbk = (struct xen_netbk *)data;
-	xen_netbk_kick_thread(netbk);
-}
-#endif
-
 static void xen_netbk_alarm(unsigned long data)
 {
 	struct xen_netbk *netbk = (struct xen_netbk *)data;
@@ -845,123 +784,6 @@ static void tx_credit_callback(unsigned long data)
 	xenvif_schedule_work(vif);
 }

-#ifdef HAVE_XEN_PAGE_FOREIGN
-static inline int copy_pending_req(struct xen_netbk *netbk,
-				   pending_ring_idx_t pending_idx)
-{
-	return gnttab_copy_grant_page(
-			netbk->grant_tx_handle[pending_idx],
-			&netbk->mmap_pages[pending_idx]);
-}
-#endif
-
-#ifdef HAVE_XEN_PAGE_FOREIGN
-static inline void xen_netbk_tx_action_dealloc(struct xen_netbk *netbk)
-{
-	struct netbk_tx_pending_inuse *inuse, *n;
-	struct gnttab_unmap_grant_ref *gop;
-	u16 pending_idx;
-	pending_ring_idx_t dc, dp;
-	struct xenvif *vif;
-	int ret;
-	LIST_HEAD(list);
-
-	dc = netbk->dealloc_cons;
-	gop = netbk->tx_unmap_ops;
-
-	/* Free up any grants we have finished using. */
-	do {
-		dp = netbk->dealloc_prod;
-
-		/* Ensure we see all indices enqueued by xen_netbk_idx_release(). */
-		smp_rmb();
-
-		while (dc != dp) {
-			unsigned long pfn;
-			struct netbk_tx_pending_inuse *pending_inuse =
-					netbk->pending_inuse;
-
-			pending_idx = netbk->dealloc_ring[pending_index(dc++)];
-			list_move_tail(&pending_inuse[pending_idx].list, &list);
-
-			pfn = idx_to_pfn(netbk, pending_idx);
-			/* Already unmapped? */
-			if (!phys_to_machine_mapping_valid(pfn))
-				continue;
-
-			gnttab_set_unmap_op(gop,
-					idx_to_kaddr(netbk, pending_idx),
-					GNTMAP_host_map,
-					netbk->grant_tx_handle[pending_idx]);
-			gop++;
-		}
-
-	} while (dp != netbk->dealloc_prod);
-
-	netbk->dealloc_cons = dc;
-
-	ret = HYPERVISOR_grant_table_op(
-		GNTTABOP_unmap_grant_ref, netbk->tx_unmap_ops,
-		gop - netbk->tx_unmap_ops);
-	BUG_ON(ret);
-
-	/*
-	 * Copy any entries that have been pending for too long
-	 */
-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-	    !list_empty(&netbk->pending_inuse_head)) {
-		list_for_each_entry_safe(inuse, n,
-				&netbk->pending_inuse_head, list) {
-			struct pending_tx_info *pending_tx_info;
-			pending_tx_info = netbk->pending_tx_info;
-
-			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
-				break;
-
-			pending_idx = inuse - netbk->pending_inuse;
-
-			pending_tx_info[pending_idx].vif->nr_copied_skbs++;
-
-			switch (copy_pending_req(netbk, pending_idx)) {
-			case 0:
-				list_move_tail(&inuse->list, &list);
-				continue;
-			case -EBUSY:
-				list_del_init(&inuse->list);
-				continue;
-			case -ENOENT:
-				continue;
-			}
-
-			break;
-		}
-	}
-
-	list_for_each_entry_safe(inuse, n, &list, list) {
-		struct pending_tx_info *pending_tx_info;
-		pending_ring_idx_t index;
-
-		pending_tx_info = netbk->pending_tx_info;
-		pending_idx = inuse - netbk->pending_inuse;
-
-		vif = pending_tx_info[pending_idx].vif;
-
-		make_tx_response(vif, &pending_tx_info[pending_idx].req,
-				 XEN_NETIF_RSP_OKAY);
-
-		/* Ready for next use. */
-		gnttab_reset_grant_page(netbk->mmap_pages[pending_idx]);
-
-		index = pending_index(netbk->pending_prod++);
-		netbk->pending_ring[index] = pending_idx;
-
-		xenvif_put(vif);
-
-		list_del_init(&inuse->list);
-	}
-}
-#endif
-
 static void netbk_tx_err(struct xenvif *vif,
 			 struct xen_netif_tx_request *txp, RING_IDX end)
 {
@@ -1019,7 +841,6 @@ static int netbk_count_requests(struct xenvif *vif,
 	return frags;
 }

-#ifndef HAVE_XEN_PAGE_FOREIGN
 static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
 					 struct sk_buff *skb,
 					 unsigned long pending_idx)
@@ -1032,21 +853,12 @@ static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
 	netbk->mmap_pages[pending_idx] = page;
 	return page;
 }
-#endif

-#ifdef HAVE_XEN_PAGE_FOREIGN
-static struct gnttab_map_grant_ref *xen_netbk_get_requests(struct xen_netbk *netbk,
-							   struct xenvif *vif,
-							   struct sk_buff *skb,
-							   struct xen_netif_tx_request *txp,
-							   struct gnttab_map_grant_ref *gop)
-#else
 static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
 						  struct xenvif *vif,
 						  struct sk_buff *skb,
 						  struct xen_netif_tx_request *txp,
 						  struct gnttab_copy *gop)
-#endif
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	skb_frag_t *frags = shinfo->frags;
@@ -1057,20 +869,13 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
 	start = ((unsigned long)shinfo->frags[0].page == pending_idx);

 	for (i = start; i < shinfo->nr_frags; i++, txp++) {
-#ifndef HAVE_XEN_PAGE_FOREIGN
 		struct page *page;
-#endif
 		pending_ring_idx_t index;
 		struct pending_tx_info *pending_tx_info =
 			netbk->pending_tx_info;

 		index = pending_index(netbk->pending_cons++);
 		pending_idx = netbk->pending_ring[index];
-#ifdef HAVE_XEN_PAGE_FOREIGN
-		gnttab_set_map_op(gop++, idx_to_kaddr(netbk, pending_idx),
-				  GNTMAP_host_map | GNTMAP_readonly,
-				  txp->gref, vif->domid);
-#else
 		page = xen_netbk_alloc_page(netbk, skb, pending_idx);
 		if (!page)
 			return NULL;
@@ -1089,7 +894,6 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
 		gop->flags = GNTCOPY_source_gref;

 		gop++;
-#endif

 		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
 		xenvif_get(vif);
@@ -1100,21 +904,11 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
 	return gop;
 }

-#ifdef HAVE_XEN_PAGE_FOREIGN
-static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
-				  struct sk_buff *skb,
-				  struct gnttab_map_grant_ref **gopp)
-#else
 static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
 				  struct sk_buff *skb,
 				  struct gnttab_copy **gopp)
-#endif
 {
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	struct gnttab_map_grant_ref *gop = *gopp;
-#else
 	struct gnttab_copy *gop = *gopp;
-#endif
 	int pending_idx = *((u16 *)skb->data);
 	struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
 	struct xenvif *vif = pending_tx_info[pending_idx].vif;
@@ -1132,13 +926,6 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
 		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
 		netbk->pending_ring[index] = pending_idx;
 		xenvif_put(vif);
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	} else {
-		set_phys_to_machine(
-			__pa(idx_to_kaddr(netbk, pending_idx)) >> PAGE_SHIFT,
-			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
-		netbk->grant_tx_handle[pending_idx] = gop->handle;
-#endif
 	}

 	/* Skip first skb fragment if it is on same page as header fragment. */
@@ -1153,14 +940,6 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
 		/* Check error status: if okay then remember grant handle. */
 		newerr = (++gop)->status;
 		if (likely(!newerr)) {
-#ifdef HAVE_XEN_PAGE_FOREIGN
-			unsigned long addr;
-			addr = idx_to_kaddr(netbk, pending_idx);
-			set_phys_to_machine(
-				__pa(addr)>>PAGE_SHIFT,
-				FOREIGN_FRAME(gop->dev_bus_addr>>PAGE_SHIFT));
-			netbk->grant_tx_handle[pending_idx] = gop->handle;
-#endif
 			/* Had a previous error? Invalidate this fragment. */
 			if (unlikely(err))
 				xen_netbk_idx_release(netbk, pending_idx);
@@ -1206,11 +985,6 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
 		unsigned long pending_idx;

 		pending_idx = (unsigned long)frag->page;
-#ifdef HAVE_XEN_PAGE_FOREIGN
-		netbk->pending_inuse[pending_idx].alloc_time = jiffies;
-		list_add_tail(&netbk->pending_inuse[pending_idx].list,
-			      &netbk->pending_inuse_head);
-#endif

 		txp = &netbk->pending_tx_info[pending_idx].req;
 		frag->page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
@@ -1384,11 +1158,7 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)

 static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
 {
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops, *request_gop;
-#else
 	struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
-#endif
 	struct sk_buff *skb;
 	int ret;

@@ -1397,9 +1167,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
 		struct xenvif *vif;
 		struct xen_netif_tx_request txreq;
 		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
-#ifndef HAVE_XEN_PAGE_FOREIGN
 		struct page *page;
-#endif
 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
 		u16 pending_idx;
 		RING_IDX idx;
@@ -1496,12 +1264,6 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
 			}
 		}

-#ifdef HAVE_XEN_PAGE_FOREIGN
-		gnttab_set_map_op(gop, idx_to_kaddr(netbk, pending_idx),
-				  GNTMAP_host_map | GNTMAP_readonly,
-				  txreq.gref, vif->domid);
-		gop++;
-#else
 		/* XXX could copy straight to head */
 		page = xen_netbk_alloc_page(netbk, skb, pending_idx);
 		if (!page) {
@@ -1524,7 +1286,6 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
 		gop->flags = GNTCOPY_source_gref;

 		gop++;
-#endif

 		memcpy(&netbk->pending_tx_info[pending_idx].req,
 		       &txreq, sizeof(txreq));
@@ -1559,29 +1320,16 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
 		vif->tx.req_cons = idx;
 		xenvif_schedule_work(vif);

-#ifdef HAVE_XEN_PAGE_FOREIGN
-		if ((gop - netbk->tx_map_ops) >= ARRAY_SIZE(netbk->tx_map_ops))
-			break;
-#else
 		if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
 			break;
-#endif
 	}

-#ifdef HAVE_XEN_PAGE_FOREIGN
-	return gop - netbk->tx_map_ops;
-#else
 	return gop - netbk->tx_copy_ops;
-#endif
 }

 static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 {
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	struct gnttab_map_grant_ref *gop = netbk->tx_map_ops;
-#else
 	struct gnttab_copy *gop = netbk->tx_copy_ops;
-#endif
 	struct sk_buff *skb;

 	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
@@ -1641,15 +1389,6 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 			continue;
 		}

-#ifdef HAVE_XEN_PAGE_FOREIGN
-		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
-		    unlikely(skb_linearize(skb))) {
-			pr_debug("Can't linearize skb in net_tx_action.\n");
-			kfree_skb(skb);
-			continue;
-		}
-#endif
-
 		vif->stats.rx_bytes += skb->len;
 		vif->stats.rx_packets++;

@@ -1664,70 +1403,18 @@ static void xen_netbk_tx_action(struct xen_netbk *netbk)
 	unsigned nr_gops;
 	int ret;

-#ifdef HAVE_XEN_PAGE_FOREIGN
-	xen_netbk_tx_action_dealloc(netbk);
-#endif
-
 	nr_gops = xen_netbk_tx_build_gops(netbk);

-#ifdef HAVE_XEN_PAGE_FOREIGN
-	if (nr_gops == 0)
-		goto out;
-	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
-					netbk->tx_map_ops, nr_gops);
-#else
 	if (nr_gops == 0)
 		return;
 	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
 					netbk->tx_copy_ops, nr_gops);
-#endif
 	BUG_ON(ret);

 	xen_netbk_tx_submit(netbk);

-#ifdef HAVE_XEN_PAGE_FOREIGN
-out:
-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-	    !list_empty(&netbk->pending_inuse_head)) {
-		struct netbk_tx_pending_inuse *oldest;
-
-		oldest = list_entry(netbk->pending_inuse_head.next,
-				    struct netbk_tx_pending_inuse, list);
-		mod_timer(&netbk->netbk_tx_pending_timer,
-				oldest->alloc_time + HZ);
-	}
-#endif
 }

-#ifdef HAVE_XEN_PAGE_FOREIGN
-static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
-{
-	static DEFINE_SPINLOCK(_lock);
-	unsigned long flags;
-	pending_ring_idx_t index;
-
-	spin_lock_irqsave(&_lock, flags);
-	index = pending_index(netbk->dealloc_prod);
-	netbk->dealloc_ring[index] = pending_idx;
-	/* Sync with xen_netbk_tx_action_dealloc: insert idx /then/ incr producer. */
-	smp_wmb();
-	netbk->dealloc_prod++;
-	spin_unlock_irqrestore(&_lock, flags);
-
-	xen_netbk_kick_thread(netbk);
-}
-
-static void netif_page_release(struct page *page, unsigned int order)
-{
-	unsigned int group, idx;
-	int foreign = get_page_ext(page, &group, &idx);
-
-	BUG_ON(!foreign);
-	BUG_ON(order);
-
-	xen_netbk_idx_release(&xen_netbk[group], idx);
-}
-#else
 static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
 {
 	struct xenvif *vif;
@@ -1753,7 +1440,6 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)
 	put_page(netbk->mmap_pages[pending_idx]);
 	netbk->mmap_pages[pending_idx] = NULL;
 }
-#endif

 static void make_tx_response(struct xenvif *vif,
 			     struct xen_netif_tx_request *txp,
@@ -1806,14 +1492,6 @@ static inline int rx_work_todo(struct xen_netbk *netbk)

 static inline int tx_work_todo(struct xen_netbk *netbk)
 {
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	if (netbk->dealloc_cons != netbk->dealloc_prod)
-		return 1;
-
-	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
-	    !list_empty(&netbk->pending_inuse_head))
-		return 1;
-#endif

 	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
 			!list_empty(&netbk->net_schedule_list))
@@ -1848,9 +1526,6 @@ static int xen_netbk_kthread(void *data)
 static int __init netback_init(void)
 {
 	int i;
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	struct page *page;
-#endif
 	int rc = 0;
 	int group;

@@ -1874,32 +1549,6 @@ static int __init netback_init(void)
 		netbk->net_timer.data = (unsigned long)netbk;
 		netbk->net_timer.function = xen_netbk_alarm;

-#ifdef HAVE_XEN_PAGE_FOREIGN
-		init_timer(&netbk->netbk_tx_pending_timer);
-		netbk->netbk_tx_pending_timer.data = (unsigned long)netbk;
-		netbk->netbk_tx_pending_timer.function =
-			xen_netbk_tx_pending_timeout;
-#endif
-
-#ifdef HAVE_XEN_PAGE_FOREIGN
-		netbk->mmap_pages =
-			alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
-		if (!netbk->mmap_pages) {
-			printk(KERN_ALERT "%s: out of memory\n", __func__);
-			del_timer(&netbk->netbk_tx_pending_timer);
-			del_timer(&netbk->net_timer);
-			rc = -ENOMEM;
-			goto failed_init;
-		}
-
-		for (i = 0; i < MAX_PENDING_REQS; i++) {
-			page = netbk->mmap_pages[i];
-			SetPageForeign(page, netif_page_release);
-			set_page_ext(page, netbk, i);
-			INIT_LIST_HEAD(&netbk->pending_inuse[i].list);
-		}
-#endif
-
 		netbk->pending_cons = 0;
 		netbk->pending_prod = MAX_PENDING_REQS;
 		for (i = 0; i < MAX_PENDING_REQS; i++)
@@ -1912,11 +1561,6 @@ static int __init netback_init(void)

 		if (IS_ERR(netbk->task)) {
 			printk(KERN_ALERT "kthread_run() fails at netback\n");
-#ifdef HAVE_XEN_PAGE_FOREIGN
-			free_empty_pages_and_pagevec(netbk->mmap_pages,
-						     MAX_PENDING_REQS);
-			del_timer(&netbk->netbk_tx_pending_timer);
-#endif
 			del_timer(&netbk->net_timer);
 			rc = PTR_ERR(netbk->task);
 			goto failed_init;
@@ -1924,9 +1568,6 @@ static int __init netback_init(void)

 		kthread_bind(netbk->task, group);

-#ifdef HAVE_XEN_PAGE_FOREIGN
-		INIT_LIST_HEAD(&netbk->pending_inuse_head);
-#endif
 		INIT_LIST_HEAD(&netbk->net_schedule_list);

 		spin_lock_init(&netbk->net_schedule_list_lock);
@@ -1936,17 +1577,6 @@ static int __init netback_init(void)
 		wake_up_process(netbk->task);
 	}

-#ifdef HAVE_XEN_PAGE_FOREIGN
-	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
-	if (MODPARM_copy_skb) {
-		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
-					      NULL, 0))
-			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
-		else
-			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
-	}
-#endif
-
 	rc = xenvif_xenbus_init();
 	if (rc)
 		goto failed_init;
@@ -1956,17 +1586,11 @@ static int __init netback_init(void)
 failed_init:
 	for (i = 0; i < group; i++) {
 		struct xen_netbk *netbk = &xen_netbk[i];
-#ifdef HAVE_XEN_PAGE_FOREIGN
-		free_empty_pages_and_pagevec(netbk->mmap_pages,
-				MAX_PENDING_REQS);
-		del_timer(&netbk->netbk_tx_pending_timer);
-#else
 		int j;
 		for (j = 0; j < MAX_PENDING_REQS; j++) {
 			if (netbk->mmap_pages[i])
 				__free_page(netbk->mmap_pages[i]);
 		}
-#endif
 		del_timer(&netbk->net_timer);
 		kthread_stop(netbk->task);
 	}
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d56b1ee..22b8c35 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -74,10 +74,6 @@ static int netback_probe(struct xenbus_device *dev,
 	dev_set_drvdata(&dev->dev, be);

 	sg = 1;
-#ifdef HAVE_XEN_PAGE_FOREIGN
-	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
-		sg = 0;
-#endif

 	do {
 		err = xenbus_transaction_start(&xbt);
--
1.7.4